Introduction

The goal of the project is to derive insights on the TMDB movie datset and perform regression models to predict revenue of the movie. This model could be leveraged by production companies for making go/no-go screening decisions.

TMDB Movie Dataset available on Kaggle. Link: https://www.kaggle.com/tmdb/tmdb-movie-metadata

Load Packages
library(plyr) #data manipulation
library(tidyverse)# data manipulation
library(formattable)# table
library(splitstackshape) # split columns
library(jsonlite) #JSON format 
library(wordcloud) #wordcloud
library(RColorBrewer) # Color Theme
library(ggthemes) #Themes for plot
library(tm) # Sentiment Analysis 
library(RSentiment) # Sentiment Analysis
library(zoo) # Time 
library(stringr) #String Manipulation
library(ggplot2)
library(VIM)
library(mice)
library(vcd)
require(car)
library(tabplot)
library(PerformanceAnalytics)
library(MASS)
library(glmnet)
library(dplyr)
Load TMDB datset
movie=read_csv("tmdb_5000_movies.csv",col_names=TRUE,na="NA")
credit=read_csv("tmdb_5000_credits.csv",col_names=TRUE,na="NA")

The Movie and credit data contain columns of nested JSON which need to be split into separate columns for accurate analysis. In the movie dataset, popularity means the number of views of the movie in the website and vote_average tells us about the movie rating.

glimpse(movie)
Observations: 4,803
Variables: 20
$ budget               <int> 237000000, 300000000, 245000000, 250000000, 260000000, 258000000, 260000000, 280000000, 250000000, 25...
$ genres               <chr> "[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 14, \"name\": \...
$ homepage             <chr> "http://www.avatarmovie.com/", "http://disney.go.com/disneypictures/pirates/", "http://www.sonypictur...
$ id                   <int> 19995, 285, 206647, 49026, 49529, 559, 38757, 99861, 767, 209112, 1452, 10764, 58, 57201, 49521, 2454...
$ keywords             <chr> "[{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\": 2964, \"name\": \"future\"}, {\"id\": 3386, \...
$ original_language    <chr> "en", "en", "en", "en", "en", "en", "en", "en", "en", "en", "en", "en", "en", "en", "en", "en", "en",...
$ original_title       <chr> "Avatar", "Pirates of the Caribbean: At World's End", "Spectre", "The Dark Knight Rises", "John Carte...
$ overview             <chr> "In the 22nd century, a paraplegic Marine is dispatched to the moon Pandora on a unique mission, but ...
$ popularity           <dbl> 150.437577, 139.082615, 107.376788, 112.312950, 43.926995, 115.699814, 48.681969, 134.279229, 98.8856...
$ production_companies <chr> "[{\"name\": \"Ingenious Film Partners\", \"id\": 289}, {\"name\": \"Twentieth Century Fox Film Corpo...
$ production_countries <chr> "[{\"iso_3166_1\": \"US\", \"name\": \"United States of America\"}, {\"iso_3166_1\": \"GB\", \"name\"...
$ release_date         <date> 2009-12-10, 2007-05-19, 2015-10-26, 2012-07-16, 2012-03-07, 2007-05-01, 2010-11-24, 2015-04-22, 2009...
$ revenue              <dbl> 2787965087, 961000000, 880674609, 1084939099, 284139100, 890871626, 591794936, 1405403694, 933959197,...
$ runtime              <int> 162, 169, 148, 165, 132, 139, 100, 141, 153, 151, 154, 106, 151, 149, 143, 150, 143, 136, 106, 144, 1...
$ spoken_languages     <chr> "[{\"iso_639_1\": \"en\", \"name\": \"English\"}, {\"iso_639_1\": \"es\", \"name\": \"Espa\\u00f1ol\"...
$ status               <chr> "Released", "Released", "Released", "Released", "Released", "Released", "Released", "Released", "Rele...
$ tagline              <chr> "Enter the World of Pandora.", "At the end of the world, the adventure begins.", "A Plan No One Escap...
$ title                <chr> "Avatar", "Pirates of the Caribbean: At World's End", "Spectre", "The Dark Knight Rises", "John Carte...
$ vote_average         <dbl> 7.2, 6.9, 6.3, 7.6, 6.1, 5.9, 7.4, 7.3, 7.4, 5.7, 5.4, 6.1, 7.0, 5.9, 6.5, 6.3, 7.4, 6.4, 6.2, 7.1, 6...
$ vote_count           <int> 11800, 4500, 4466, 9106, 2124, 3576, 3330, 6767, 5293, 7004, 1400, 2965, 5246, 2311, 6359, 1630, 1177...
glimpse(credit)
Observations: 4,803
Variables: 4
$ movie_id <int> 19995, 285, 206647, 49026, 49529, 559, 38757, 99861, 767, 209112, 1452, 10764, 58, 57201, 49521, 2454, 24428, 186...
$ title    <chr> "Avatar", "Pirates of the Caribbean: At World's End", "Spectre", "The Dark Knight Rises", "John Carter", "Spider-...
$ cast     <chr> "[{\"cast_id\": 242, \"character\": \"Jake Sully\", \"credit_id\": \"5602a8a7c3a3685532001c9a\", \"gender\": 2, \...
$ crew     <chr> "[{\"credit_id\": \"52fe48009251416c750aca23\", \"department\": \"Editing\", \"gender\": 0, \"id\": 1721, \"job\"...

1. Data Exploratory Analysis

genredf=movie %>% filter(nchar(genres)>2) %>% mutate(js=lapply(genres,fromJSON)) %>% unnest(js) %>% dplyr::select(id,title,genre=name) #Convert JSON format into data frame
slice(genredf)
1.1. Wordcloud: Genre Representation
#A look at the genre variety in our dataset
df <- as.data.frame(table(genredf$genre))
df2 <- with(df,df[order(Freq,decreasing = TRUE),])
#wordcloud
wordcloud(words=df2$Var1,freq=df2$Freq,min.freq=100,max.words = 20,random.order=FALSE,random.color=TRUE,rot.per=0.35,colors = brewer.pal(20,"Dark2"),scale=c(5,.2))
n too large, allowed maximum for palette Dark2 is 8
Returning the palette you asked for with that many colors

1.2. Wordcloud: Production Companies
production=movie %>% filter(nchar(production_companies)>2) %>% mutate(js=lapply(production_companies,fromJSON)) %>% unnest(js) %>% dplyr::select(budget,revenue,company=name)
lapply(production,class)
$budget
[1] "integer"

$revenue
[1] "numeric"

$company
[1] "character"
df <- as.data.frame(table(production$company))
df2 <- with(df,df[order(Freq,decreasing = TRUE),])
df2
wordcloud(words=df2$Var1,freq=df2$Freq,min.freq=50,max.words = 25,color =rainbow(7),scale=c(3,0.5))

1.3. Which year has seen maximum release of movies ?
movie$Year=as.factor(format(movie$release_date,"%Y"))
movie$Date=as.factor(format(movie$release_date,"%d"))
movie$month=month.abb[(as.factor(format(movie$release_date,"%m")))]
df <- as.data.frame(table(movie$month))
df2 <- with(df,df[order(Freq,decreasing = TRUE),])
df2
df2 %>%
ggplot(aes(reorder(Var1,Freq),Freq,fill=Var1))+geom_bar(stat="identity")+theme(plot.title=element_text(size=14,face="italic",colour="red"),axis.text.x = element_text(angle=90),legend.position="none")+labs(x="",y="Total number of movies released",title="Number of Movies Releases per month")+coord_flip()+geom_label(aes(label=Freq))

1.4. Does high budget movie necessarily mean high popularity among viewers?
db=movie %>% left_join(credit,by=c("id"="movie_id"))
db_credit=db %>% filter(nchar(cast)>2) %>% mutate(js=lapply(cast,fromJSON)) %>% unnest(js)
get_cor <- function(df){
    m <- cor(df$x,df$y, use="pairwise.complete.obs");
    eq <- substitute(expr=r==cor,env=list(cor=format(m, digits = 4)))
    return(as.character(as.expression(eq) ))                
}
temp=db_credit %>%  dplyr::select(budget,popularity) %>% distinct() 
ggplot(temp,aes(budget,popularity))+stat_bin_hex(bins=15)+scale_fill_distiller(palette="Spectral")+stat_smooth(method="lm",color="orchid",size=2)+scale_x_continuous(labels=scales::comma)

temp=db_credit %>%  dplyr::select(budget,vote_count) %>% distinct() 
ggplot(temp,aes(budget,vote_count))+stat_bin_hex(bins=15)+scale_fill_distiller(palette="Spectral")+stat_smooth(method="lm",color="orchid",size=2)+scale_x_continuous(labels=scales::comma)

temp=db_credit %>%  dplyr::select(budget,revenue) %>% distinct() 
ggplot(temp,aes(budget,revenue))+stat_bin_hex(bins=15)+scale_fill_distiller(palette="Spectral")+stat_smooth(method="lm",color="orchid",size=2)+scale_x_continuous(labels=scales::comma)

From the above plots we could realise that even when production companies spend lot of money on the movie it will not yield the revenue unless important features like story and direction make an impact to the audience.

1.5. Comparing Revenue with vote_count, popularity and runtime
ggplot(db_credit, aes(x =runtime, y =revenue))+
  geom_point(size=1, aes(colour=vote_count)) +
  labs(title = "Runtime Vs. Revenue and vote count", 
       x = "Runtime", y = "Revenue")

ggplot(db_credit, aes(x =popularity, y =revenue))+
  geom_point(size=1, aes(colour=vote_count)) +
  labs(title = "Popularity Vs. Revenue and vote count", 
       x = "popularity", y = "Revenue")

ggplot(aes(x = log(revenue), y = log(budget)), data = db_credit) +
  geom_point(alpha = 0.1, position = position_jitter(h = 0)) +
  geom_smooth(method = 'lm', color = 'red') +
  ylab('Budget') +
  xlab('Revenue') +
  ggtitle('Budget Vs Revenue')

ggplot(aes(x = log(revenue), y = vote_average), data = db_credit) +
  geom_point(alpha = 0.1, position = position_jitter(h = 0)) +
  geom_smooth(method = 'lm', color = 'red') +
  ylab('TMDB score') +
  xlab('Revenue') +
  ggtitle('TMDB score vs Revenue')

ggplot(aes(x = log(budget), y = vote_average), data = db_credit) +
  geom_point(alpha = 0.1, position = position_jitter(h = 0)) +
  geom_smooth(method = 'lm', color = 'red') +
  ylab('TMDB score') +
  xlab('Budget') +
  ggtitle('TMDB score vs Budget')

1.6. Average Movie Rating
ggplot(db_credit,aes(vote_average)) +
  geom_histogram(bins = 100) +
  geom_vline(xintercept = mean(tmdb$vote_average,na.rm = TRUE),colour = "red") + 
  ylab("Count of Movies") + 
  xlab("Average Vote") + 
  ggtitle("Histogram for average vote rating")

Mean : 6.092083

Let us look at the top 20 movies with highest average_vote with color according to vote count.

Here, movies with vote count > 500 are considered as movies with less vote counts and high rating can be a misleading statistic.

head(db_credit)
1.7. Trasformation of JSON column to unique columns to the movie dataset
## transformation of "keywords" column into tibble
keywords <- movie %>%    
  filter(nchar(keywords) > 2) %>%                 # fiter out blank keywords field
  mutate(                                         # create a new field 
    js = lapply(keywords, fromJSON)               # containing a LIST of keyword and value pairs
  ) %>%                                           # called id and name
  unnest(js) %>%                                  # turn each keyword/value pairs in the LIST into a row
  dplyr::select(id, title, keywords = name)
## Combining the keywords of a movie in a single column
keywords <- aggregate(keywords ~.,data = keywords, paste, collapse = ",")
#Combining the genres of a movie in a single column
genres <- movie %>% filter(nchar(genres) > 2) %>%                   
  mutate( js = lapply(genres, fromJSON)) %>%                                           
  unnest(js) %>%                                  
  dplyr::select(id, title, genres = name) 
genres <- aggregate(genres ~.,data = genres, paste, collapse = ",")
# Combining production_companies
production_companies <- movie %>% filter(nchar(production_companies) > 2) %>%                   
  mutate( js = lapply(production_companies, fromJSON)) %>%                                           
  unnest(js) %>%                                  
  dplyr::select(id, title, production_companies = name) 
production_companies <- aggregate(production_companies ~.,data = production_companies, paste, collapse = ",")
# Combining production countries
production_countries <- movie %>%    
  filter(nchar(production_countries) > 2) %>%     
  mutate(                                         
    js = lapply(production_countries, fromJSON)   
  ) %>%                                          
  unnest(js) %>%                                  
  dplyr::select(id, title, production_countries = name)
countries <- movie %>%    
  filter(nchar(production_countries) > 2) %>%     
  mutate(                                         
    js = lapply(production_countries, fromJSON)   
  ) %>%                                          
  unnest(js) %>%                                  
  dplyr::select(id, title, production_countries = name)
production_countries <- aggregate(production_countries ~.,data = production_countries, paste, collapse = ",")
# combining spoken languages
spoken_languages <- movie %>%    
  filter(nchar(spoken_languages) > 2) %>%        
  mutate(                                         
    js = lapply(spoken_languages, fromJSON)      
  ) %>%                                          
  unnest(js) %>%                                 
  dplyr::select(id, title, spoken_languages = iso_639_1) 
spoken_languages <- aggregate(spoken_languages ~.,data = spoken_languages, paste, collapse = ",")
movies <- subset(movie, select = -c(genres, keywords, production_companies, production_countries,spoken_languages))
glimpse(movies)
Observations: 4,803
Variables: 18
$ budget            <int> 237000000, 300000000, 245000000, 250000000, 260000000, 258000000, 260000000, 280000000, 250000000, 25000...
$ homepage          <chr> "http://www.avatarmovie.com/", "http://disney.go.com/disneypictures/pirates/", "http://www.sonypictures....
$ id                <int> 19995, 285, 206647, 49026, 49529, 559, 38757, 99861, 767, 209112, 1452, 10764, 58, 57201, 49521, 2454, 2...
$ original_language <chr> "en", "en", "en", "en", "en", "en", "en", "en", "en", "en", "en", "en", "en", "en", "en", "en", "en", "e...
$ original_title    <chr> "Avatar", "Pirates of the Caribbean: At World's End", "Spectre", "The Dark Knight Rises", "John Carter",...
$ overview          <chr> "In the 22nd century, a paraplegic Marine is dispatched to the moon Pandora on a unique mission, but bec...
$ popularity        <dbl> 150.437577, 139.082615, 107.376788, 112.312950, 43.926995, 115.699814, 48.681969, 134.279229, 98.885637,...
$ release_date      <date> 2009-12-10, 2007-05-19, 2015-10-26, 2012-07-16, 2012-03-07, 2007-05-01, 2010-11-24, 2015-04-22, 2009-07...
$ revenue           <dbl> 2787965087, 961000000, 880674609, 1084939099, 284139100, 890871626, 591794936, 1405403694, 933959197, 87...
$ runtime           <int> 162, 169, 148, 165, 132, 139, 100, 141, 153, 151, 154, 106, 151, 149, 143, 150, 143, 136, 106, 144, 136,...
$ status            <chr> "Released", "Released", "Released", "Released", "Released", "Released", "Released", "Released", "Release...
$ tagline           <chr> "Enter the World of Pandora.", "At the end of the world, the adventure begins.", "A Plan No One Escapes"...
$ title             <chr> "Avatar", "Pirates of the Caribbean: At World's End", "Spectre", "The Dark Knight Rises", "John Carter",...
$ vote_average      <dbl> 7.2, 6.9, 6.3, 7.6, 6.1, 5.9, 7.4, 7.3, 7.4, 5.7, 5.4, 6.1, 7.0, 5.9, 6.5, 6.3, 7.4, 6.4, 6.2, 7.1, 6.5,...
$ vote_count        <int> 11800, 4500, 4466, 9106, 2124, 3576, 3330, 6767, 5293, 7004, 1400, 2965, 5246, 2311, 6359, 1630, 11776, ...
$ Year              <fct> 2009, 2007, 2015, 2012, 2012, 2007, 2010, 2015, 2009, 2016, 2006, 2008, 2006, 2013, 2013, 2008, 2012, 20...
$ Date              <fct> 10, 19, 26, 16, 07, 01, 24, 22, 07, 23, 28, 30, 20, 03, 12, 15, 25, 14, 23, 10, 27, 12, 11, 04, 14, 18, ...
$ month             <chr> "Dec", "May", "Oct", "Jul", "Mar", "May", "Nov", "Apr", "Jul", "Mar", "Jun", "Oct", "Jun", "Jul", "Jun",...
# Dropped existing unformatted columns in the main dataset, creating a new dataset "movies"
movies <- subset(movie, select = -c(genres, keywords, production_companies, production_countries, spoken_languages))
movies <- movies %>%
  full_join(keywords, by = c("id", "title")) %>%
  full_join(genres, by = c("id", "title")) %>%
  full_join(production_companies, by = c("id", "title")) %>%
  full_join(production_countries, by = c("id", "title")) %>%
  full_join(spoken_languages, by = c("id", "title"))
glimpse(movies)
Observations: 4,803
Variables: 23
$ budget               <int> 237000000, 300000000, 245000000, 250000000, 260000000, 258000000, 260000000, 280000000, 250000000, 25...
$ homepage             <chr> "http://www.avatarmovie.com/", "http://disney.go.com/disneypictures/pirates/", "http://www.sonypictur...
$ id                   <int> 19995, 285, 206647, 49026, 49529, 559, 38757, 99861, 767, 209112, 1452, 10764, 58, 57201, 49521, 2454...
$ original_language    <chr> "en", "en", "en", "en", "en", "en", "en", "en", "en", "en", "en", "en", "en", "en", "en", "en", "en",...
$ original_title       <chr> "Avatar", "Pirates of the Caribbean: At World's End", "Spectre", "The Dark Knight Rises", "John Carte...
$ overview             <chr> "In the 22nd century, a paraplegic Marine is dispatched to the moon Pandora on a unique mission, but ...
$ popularity           <dbl> 150.437577, 139.082615, 107.376788, 112.312950, 43.926995, 115.699814, 48.681969, 134.279229, 98.8856...
$ release_date         <date> 2009-12-10, 2007-05-19, 2015-10-26, 2012-07-16, 2012-03-07, 2007-05-01, 2010-11-24, 2015-04-22, 2009...
$ revenue              <dbl> 2787965087, 961000000, 880674609, 1084939099, 284139100, 890871626, 591794936, 1405403694, 933959197,...
$ runtime              <int> 162, 169, 148, 165, 132, 139, 100, 141, 153, 151, 154, 106, 151, 149, 143, 150, 143, 136, 106, 144, 1...
$ status               <chr> "Released", "Released", "Released", "Released", "Released", "Released", "Released", "Released", "Rele...
$ tagline              <chr> "Enter the World of Pandora.", "At the end of the world, the adventure begins.", "A Plan No One Escap...
$ title                <chr> "Avatar", "Pirates of the Caribbean: At World's End", "Spectre", "The Dark Knight Rises", "John Carte...
$ vote_average         <dbl> 7.2, 6.9, 6.3, 7.6, 6.1, 5.9, 7.4, 7.3, 7.4, 5.7, 5.4, 6.1, 7.0, 5.9, 6.5, 6.3, 7.4, 6.4, 6.2, 7.1, 6...
$ vote_count           <int> 11800, 4500, 4466, 9106, 2124, 3576, 3330, 6767, 5293, 7004, 1400, 2965, 5246, 2311, 6359, 1630, 1177...
$ Year                 <fct> 2009, 2007, 2015, 2012, 2012, 2007, 2010, 2015, 2009, 2016, 2006, 2008, 2006, 2013, 2013, 2008, 2012,...
$ Date                 <fct> 10, 19, 26, 16, 07, 01, 24, 22, 07, 23, 28, 30, 20, 03, 12, 15, 25, 14, 23, 10, 27, 12, 11, 04, 14, 1...
$ month                <chr> "Dec", "May", "Oct", "Jul", "Mar", "May", "Nov", "Apr", "Jul", "Mar", "Jun", "Oct", "Jun", "Jul", "Ju...
$ keywords             <chr> "culture clash,future,space war,space colony,society,space travel,futuristic,romance,space,alien,trib...
$ genres               <chr> "Action,Adventure,Fantasy,Science Fiction", "Adventure,Fantasy,Action", "Action,Adventure,Crime", "Ac...
$ production_companies <chr> "Ingenious Film Partners,Twentieth Century Fox Film Corporation,Dune Entertainment,Lightstorm Enterta...
$ production_countries <chr> "United States of America,United Kingdom", "United States of America", "United Kingdom,United States ...
$ spoken_languages     <chr> "en,es", "en", "fr,en,es,it,de", "en", "en", "en,fr", "en", "en", "en", "en", "en,fr,de", "en,es,it,f...
1.8. Movie Rating Vs Budget
movies %>% dplyr::select(title,vote_average,vote_count, budget) %>% 
  filter(vote_count > 500 ) %>% arrange(desc(vote_average)) %>% head(20) %>%
  ggplot(aes(x = title,y = vote_average,fill = budget )) + geom_bar(stat = "identity") + coord_flip(ylim = c(7, 9)) +
  scale_fill_continuous()

1.9. Popularity vs Budget are not highly correlated
#Top 20 movies by popularity, color according to vote count :
movies %>% dplyr::select(title,vote_average,vote_count, popularity) %>% 
  filter(vote_count > 300 ) %>%  head(30) %>%
  ggplot(aes(x = title,y = popularity, fill = vote_count)) + geom_bar(stat = "identity") + coord_flip() +
  scale_fill_continuous()

1.10. Analysing Movie Genre
genre1 <- Corpus(VectorSource(genres$genres))
dtm <- DocumentTermMatrix(genre1)
genre_freq <- colSums(as.matrix(dtm))
freq <- sort(colSums(as.matrix(dtm)), decreasing = TRUE) 
genre_wframe <- data.frame(word = names(genre_freq), freq = genre_freq)
ggplot(genre_wframe, aes(x = reorder(word,-freq), y = freq)) +  
  geom_bar(stat = "identity") + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  ggtitle("Movie Genre frequency graph") + 
  xlab("Genre") + 
  ylab("Frequency")

1.11. Compare Profit with the movie rating and budget
#Adding new columns gross and gross_flag 
movies <- movies %>%
  mutate(gross = revenue - budget, gross_flag = ifelse(gross < 0, "Loss", "Profit"))
library(plotly)

Attaching package: ‘plotly’

The following object is masked from ‘package:MASS’:

    select

The following object is masked from ‘package:formattable’:

    style

The following object is masked from ‘package:ggplot2’:

    last_plot

The following objects are masked from ‘package:plyr’:

    arrange, mutate, rename, summarise

The following object is masked from ‘package:stats’:

    filter

The following object is masked from ‘package:graphics’:

    layout
plot_ly(movies, x = ~vote_average, y = ~budget, z = ~gross/1000000, 
        color = ~gross_flag, colors = c('#BF382A', '#0C4B8E') ,size = I(3)) %>%
  add_markers() %>%
  layout(scene = list(xaxis = list(title = 'Average vote'),
                      yaxis = list(title = 'Budget'),
                      zaxis = list(title = 'Gross (million $)')),
         title = "INTERACTIVE 3D Scatter plot: Average vote vs Budget vs Gross",
         showlegend = FALSE)

The blue dots represents the movie with movies making profits, we can hover over the plot to undersatnd the behaviour of the variables vote_average anf budget with respect to Gross.

2. Data Cleaning

movies <- read.csv("tmdb_5000_movies.csv", header = TRUE, stringsAsFactors = FALSE)
credits <- read.csv("tmdb_5000_credits.csv",header = TRUE, stringsAsFactors = FALSE )
library(tidyverse)
library(jsonlite)
#movies <- tmdb_5000_movies
#credits <- tmdb_5000_credits
Top.100.Directors.Working.Today <- read.csv("Top 100 Directors Working Today.csv", header = TRUE, stringsAsFactors = FALSE)
The.Top.100.Actors.of.2016 <- read.csv("The Top 100 Actors of 2016.csv",header = TRUE, stringsAsFactors = FALSE )
top100directors <- Top.100.Directors.Working.Today$Name
top100directors <- paste(top100directors)
top100actors <- The.Top.100.Actors.of.2016$Name
top100actors <- paste(top100actors)
# from: http://www.imdb.com/list/ls072596173/
#top100directors <- Top.100.Directors.Working.Today$Name
#top100directors <- paste(top100directors)
#top100actors <- The.Top.100.Actors.of.2016$Name
#top100actors <- paste(top100actors)
# Drop useless columns
movies <- movies[,!(names(movies) %in% c('homepage','overview','status',
                                           'title','tagline','original_title'))]
credits <- credits[,!(names(credits) %in% c('title'))]
dim(movies)
[1] 4803   14

2.1. Let’s clean movies dataset

2.1.1. Distinct Months; Holiday month
# Extract month of release date; put into new column
movies$release_date <- as.POSIXlt(movies$release_date, format="%Y-%m-%d")
movies$release_month <- lubridate::month(movies$release_date)
# From Linear Regression using each of 12 months as dummy categories,
# we saw that months 5, 6, 11 and 12 are important,
# while the rest are unimportant.
# We will aggregate this as 'holiday month' (beginning of Summer; beginning of Winter)
movies$holiday_month <- 0
movies[which(movies$release_month %in% c(5,6,11,12)),]['holiday_month'] <- 1
#drop release date and month
movies <- movies[ , !(names(movies) %in% c('release_date','release_month'))]
2.1.2. Number of Languages; English / No english
# count number of languages
movies$num_lang <- stringr::str_count(movies$spoken_languages, "\"name\":")
movies$original_isEnglish <- 0
movies[which(movies$original_language == 'en'),]['original_isEnglish'] <- 1
# drop original lanugage; spoken languages
movies <- movies[ , !(names(movies) %in% c('original_language', 'spoken_languages'))]
2.1.3. Distinct Production Countries
# turn JSON into DF with one country per row
countryDF = movies %>% filter(nchar(as.character(production_countries))>2) %>% mutate(js=lapply(as.character(production_countries),fromJSON)) %>% unnest(js) %>% dplyr::select(id, production_countries=name)
# Select Unique Countries
# Distinct_Countries = countryDF %>% distinct(production_countries)
df <- as.data.frame(table(countryDF$production_countries))
head(with(df,df[order(Freq,decreasing = TRUE),]))

We will make dummy variables on top 6 frequent countries on the list.

# United States
movies$country_USA <- 0
movies$country_UK <- 0
movies$country_Germany <- 0
movies$country_France <- 0
movies$country_Canada <- 0
movies$country_Austr <- 0
for (i in 1:nrow(movies)) {
  noQuotes <- strsplit(toString(movies$production_countries[i]),'\"')
  noQuotes <- unlist(noQuotes)
  
  if (any(noQuotes == "United States of America")) {
    movies$country_USA[i] <- 1
  }
  if (any(noQuotes == "United Kingdom")) {
    movies$country_UK[i] <- 1
  }
  if (any(noQuotes == "Germany")) {
    movies$country_Germany[i] <- 1
  }
  if (any(noQuotes == "France")) {
    movies$country_France[i] <- 1
  }
  if (any(noQuotes == "Canada")) {
    movies$country_Canada[i] <- 1
  }
  if (any(noQuotes == "Australia")) {
    movies$country_Austr[i] <- 1
  }
}
# drop production countries column
movies <- movies[,!(names(movies) %in% c('production_countries'))]
# Check correct numbers below.
# Should match with frequencies from
# head(with(df,df[order(Freq,decreasing = TRUE),]))
as.data.frame(table(movies$country_UK))
2.1.4. Distinct Genres
# turn JSON into DF with one genre per row
genreDF= movies %>% filter(nchar(as.character(genres))>2) %>% mutate(js=lapply(as.character(genres),fromJSON)) %>% unnest(js) %>% dplyr::select(id, genre=name)
# slice(genredf) # View the newly created DF
# Select Unique Genres
Distinct_Genres = unique(genreDF$genre)
  
# Make dummy variable for each genre
for (name in Distinct_Genres) {
  
  # prepend 'genre' to all genre names
  col_Name <- paste('genre', name, sep='_')
  movies[col_Name] <- 0
  movies[grep(name,movies$genres),][col_Name] <- 1
}
# Drop raw json genres
movies <- movies[,!(names(movies) %in% c('genres'))]
2.1.5. Distinct Keywords
# turn JSON into DF with one keyword per row
keywordDF=movies %>% filter(nchar(as.character(keywords))>2) %>% mutate(js=lapply(as.character(keywords),fromJSON)) %>% unnest(js) %>% dplyr::select(id,keywords=name)
# Select Unique Genres
# Distinct_Keywords = unique(keywordDF$keywords)
# length(Distinct_Keywords)
df <- as.data.frame(table(keywordDF$keywords))
with(df,df[order(Freq,decreasing = TRUE),])
2.1.6. Distinct Production Companies
# turn JSON into DF with one company per row
companyDF=movies %>% filter(nchar(as.character(production_companies))>2) %>% mutate(js=lapply(as.character(production_companies),fromJSON)) %>% unnest(js) %>% dplyr::select(id,production_companies=name)
# Select Unique Genres
# Distinct_Companies = companyDF %>% distinct(production_companies)
df <- as.data.frame(table(companyDF$production_companies))
with(df,df[order(Freq,decreasing = TRUE),])

2.2. Lets Clean Credits Dataset

# Split JSON for cast
all_cast <- credits %>%      # start with the raw tibble 
  filter(nchar(as.character(cast)) > 2) %>%        # filter out movies with empty cast 
  mutate(                          #       
    js_cast  =  lapply(as.character(cast), fromJSON)  # turn the JSON into a list
  )  %>%                           #
  unnest(js_cast) %>% dplyr::select(movie_id, cast_id, character, credit_id, gender, id, name = name, order)
# Split JSON for crew
all_crew <- credits %>%      # start with the raw tibble 
  filter(nchar(as.character(crew)) > 2) %>%        # filter out movies with empty crew  
  mutate(                                 
    js_crew  =  lapply(as.character(crew), fromJSON)  # turn the JSON into a list
  )  %>%                           #
  unnest(js_crew) %>% dplyr::select(movie_id, credit_id, department, gender, id, job, name = name)
2.2.1. Check if Director is in top 100
# List all movie directors
directors <- filter(all_crew[,c('movie_id','job','name')], all_crew$job == 'Director')
directors
# List of top 100 directors
with(Top.100.Directors.Working.Today, Top.100.Directors.Working.Today[order(Name),])['Name']

Make a column to represent if the movie has top director.

# New binary column to aggregate by director fame
credits$topDirector <- 0
# If director in top 100 directors list, change from 0 to 1
for (i in 1:nrow(directors)) {
  if (directors$name[i] %in% top100directors) {       # match director and credits DFs by ID
    credits[which(credits$movie_id == directors$movie_id[i]),]['topDirector'] <- 1
  }
}
# Drop crew column
credits <- credits[,!(names(credits) %in% c('crew'))]
credits
# Check value frequencies
as.data.frame(table(credits$topDirector))
2.2.2. Check if any top 100 Actors
# List all movie directors
actors <- filter(all_cast[,c('movie_id','name')])
actors

Make a column to represent if the movie has top actor.

# New binary column to aggregate by director fame
credits$topActor <- 0
# If Actors in top 100 actors list, change from 0 to 1
for (i in 1:nrow(actors)) {
  if (actors$name[i] %in% top100actors) {       # match actors and credits DFs by ID
    credits[which(credits$movie_id == actors$movie_id[i]),]['topActor'] <- 1
  }
}
head(credits$topActor)
[1] 0 1 1 1 1 1
2.2.3. Average Genders of Cast
# 0 = unknown
# 1 = Female
# 2 = Male
CastGenders <- filter(all_cast[,c('movie_id','gender')],
                  all_cast$gender != 0)
CastGenders <- aggregate(CastGenders$gender, by=list(CastGenders$movie_id), FUN=mean)
# Append Avg Genders to Credits
credits$CastGenderAVG <- 1.5
for (ID in CastGenders$Group.1){
  credits[which(credits$movie_id == ID),]['CastGenderAVG'] <- 
    CastGenders[which(CastGenders$Group.1 == ID),]['x']
}
# Drop cast column
credits <- credits[,!(names(credits) %in% c('cast'))]
2.2.4. Merge movie and credit dataset
TMDB_Cleaned <- left_join(credits,movies, by = c('movie_id' = 'id'))
colnames(TMDB_Cleaned)
 [1] "movie_id"              "topDirector"           "topActor"              "CastGenderAVG"         "budget"               
 [6] "keywords"              "popularity"            "production_companies"  "revenue"               "runtime"              
[11] "vote_average"          "vote_count"            "holiday_month"         "num_lang"              "original_isEnglish"   
[16] "country_USA"           "country_UK"            "country_Germany"       "country_France"        "country_Canada"       
[21] "country_Austr"         "genre_Action"          "genre_Adventure"       "genre_Fantasy"         "genre_Science Fiction"
[26] "genre_Crime"           "genre_Drama"           "genre_Thriller"        "genre_Animation"       "genre_Family"         
[31] "genre_Western"         "genre_Comedy"          "genre_Romance"         "genre_Horror"          "genre_Mystery"        
[36] "genre_History"         "genre_War"             "genre_Music"           "genre_Documentary"     "genre_Foreign"        
[41] "genre_TV Movie"       
dim(TMDB_Cleaned)
[1] 4803   41
2.2.5. Remove keywords and production_companies from the dataset as they are huge and is less significant to predict the revenue
# remove this after cleaning keywords and companies properly
TMDB_Cleaned <- TMDB_Cleaned[,!(names(TMDB_Cleaned) %in% c('keywords','production_companies'))]
TMDB_Cleaned <- na.omit(TMDB_Cleaned)
dim(TMDB_Cleaned)
[1] 4801   39
write.csv(TMDB_Cleaned, file='TMDB_Cleaned.csv')
tmdb <- read.csv("TMDB_Cleaned.csv", header = TRUE, stringsAsFactors = FALSE)
tmdb1<-subset(tmdb,select=-c(X,movie_id))
head(tmdb1)
2.2.6. Concise num_lang to 4 spoken languages in a movie for better analysis
tmdb1 <- tmdb1 %>% mutate(num_lang = ifelse(num_lang >= 5, 4, num_lang))
df <- as.data.frame(table(tmdb1$num_lang))
head(with(df,df[order(Freq,decreasing = TRUE),]))
head(tmdb1)
2.2.7. Converting binary columns to categorical variables
tmdb1$original_isEnglish<- as.factor(tmdb1$original_isEnglish)
tmdb1$holiday_month<- as.factor(tmdb1$holiday_month)
tmdb1$topDirector<- as.factor(tmdb1$topDirector)
tmdb1$topActor <- as.factor(tmdb1$topActor)
tmdb1$num_lang <- as.factor(tmdb1$num_lang)
tmdb1$genre_Crime <- as.factor(tmdb1$genre_Crime)
tmdb1$genre_Science.Fiction <- as.factor(tmdb1$genre_Science.Fiction)
tmdb1$genre_Family<- as.factor(tmdb1$genre_Family)
tmdb1$genre_Animation <- as.factor(tmdb1$genre_Animation)
tmdb1$genre_Western <- as.factor(tmdb1$genre_Western)
tmdb1$genre_Adventure <- as.factor(tmdb1$genre_Adventure)
tmdb1$genre_Romance <- as.factor(tmdb1$genre_Romance)
tmdb1$genre_Drama <- as.factor(tmdb1$genre_Drama)
tmdb1$genre_Action <- as.factor(tmdb1$genre_Action)
tmdb1$genre_Documentary <- as.factor(tmdb1$genre_Documentary)
tmdb1$genre_Comedy <- as.factor(tmdb1$genre_Comedy)
tmdb1$genre_Horror <- as.factor(tmdb1$genre_Horror)
tmdb1$genre_Mystery <- as.factor(tmdb1$genre_Mystery)
tmdb1$genre_History <- as.factor(tmdb1$genre_History)
tmdb1$genre_Music <- as.factor(tmdb1$genre_History)
tmdb1$genre_Foreign <- as.factor(tmdb1$genre_Foreign)
tmdb1$genre_TV.Movie <- as.factor(tmdb1$genre_TV.Movie)
tmdb1$genre_Fantasy <- as.factor(tmdb1$genre_Fantasy)
tmdb1$genre_Thriller <- as.factor(tmdb1$genre_Thriller)
tmdb1$genre_War <- as.factor(tmdb1$genre_War)
tmdb1$country_USA <- as.factor(tmdb1$country_USA)
tmdb1$country_UK <- as.factor(tmdb1$country_UK)
tmdb1$country_France <- as.factor(tmdb1$country_France)
tmdb1$country_Germany <- as.factor(tmdb1$country_Germany)
tmdb1$country_Canada <- as.factor(tmdb1$country_Canada)
tmdb1$country_Austr <- as.factor(tmdb1$country_Austr)
#tmdb2$revenue <- log(tmdb2$revenue)
#tmdb2$budget <- log(tmdb2$budget)
str(tmdb1)
'data.frame':   4801 obs. of  38 variables:
 $ topDirector          : Factor w/ 2 levels "0","1": 2 1 1 2 1 1 1 2 1 1 ...
 $ topActor             : Factor w/ 2 levels "0","1": 1 2 2 2 2 2 1 2 1 2 ...
 $ CastGenderAVG        : num  1.62 1.83 1.81 1.87 1.76 ...
 $ budget               : int  237000000 300000000 245000000 250000000 260000000 258000000 260000000 280000000 250000000 250000000 ...
 $ popularity           : num  150.4 139.1 107.4 112.3 43.9 ...
 $ revenue              : num  2.79e+09 9.61e+08 8.81e+08 1.08e+09 2.84e+08 ...
 $ runtime              : int  162 169 148 165 132 139 100 141 153 151 ...
 $ vote_average         : num  7.2 6.9 6.3 7.6 6.1 5.9 7.4 7.3 7.4 5.7 ...
 $ vote_count           : int  11800 4500 4466 9106 2124 3576 3330 6767 5293 7004 ...
 $ holiday_month        : Factor w/ 2 levels "0","1": 2 2 1 1 1 2 2 1 1 1 ...
 $ num_lang             : Factor w/ 5 levels "0","1","2","3",..: 3 2 5 2 2 3 2 2 2 2 ...
 $ original_isEnglish   : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
 $ country_USA          : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
 $ country_UK           : Factor w/ 2 levels "0","1": 2 1 2 1 1 1 1 1 2 1 ...
 $ country_Germany      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ country_France       : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ country_Canada       : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ country_Austr        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ genre_Action         : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 1 2 1 2 ...
 $ genre_Adventure      : Factor w/ 2 levels "0","1": 2 2 2 1 2 2 1 2 2 2 ...
 $ genre_Fantasy        : Factor w/ 2 levels "0","1": 2 2 1 1 1 2 1 1 2 2 ...
 $ genre_Science.Fiction: Factor w/ 2 levels "0","1": 2 1 1 1 2 1 1 2 1 1 ...
 $ genre_Crime          : Factor w/ 2 levels "0","1": 1 1 2 2 1 1 1 1 1 1 ...
 $ genre_Drama          : Factor w/ 2 levels "0","1": 1 1 1 2 1 1 1 1 1 1 ...
 $ genre_Thriller       : Factor w/ 2 levels "0","1": 1 1 1 2 1 1 1 1 1 1 ...
 $ genre_Animation      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 2 1 1 1 ...
 $ genre_Family         : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 2 1 2 1 ...
 $ genre_Western        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ genre_Comedy         : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ genre_Romance        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ genre_Horror         : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ genre_Mystery        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ genre_History        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ genre_War            : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ genre_Music          : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ genre_Documentary    : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ genre_Foreign        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ genre_TV.Movie       : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
2.2.8. Scale the data for uniformity in numerical values of the dataset
library(MASS)
ind <- sapply(tmdb1, is.numeric)
tmdb1[ind] <- lapply(tmdb1[ind], scale)
str(tmdb1)
'data.frame':   4801 obs. of  38 variables:
 $ topDirector          : Factor w/ 2 levels "0","1": 2 1 1 2 1 1 1 2 1 1 ...
 $ topActor             : Factor w/ 2 levels "0","1": 1 2 2 2 2 2 1 2 1 2 ...
 $ CastGenderAVG        : num [1:4801, 1] -0.215 0.946 0.81 1.145 0.538 ...
  ..- attr(*, "scaled:center")= num 1.66
  ..- attr(*, "scaled:scale")= num 0.179
 $ budget               : num [1:4801, 1] 5.11 6.65 5.3 5.42 5.67 ...
  ..- attr(*, "scaled:center")= num 29054015
  ..- attr(*, "scaled:scale")= num 40728211
 $ popularity           : num [1:4801, 1] 4.052 3.695 2.699 2.854 0.705 ...
  ..- attr(*, "scaled:center")= num 21.5
  ..- attr(*, "scaled:scale")= num 31.8
 $ revenue              : num [1:4801, 1] 16.61 5.39 4.9 6.16 1.24 ...
  ..- attr(*, "scaled:center")= num 82294907
  ..- attr(*, "scaled:scale")= num 1.63e+08
 $ runtime              : num [1:4801, 1] 2.44 2.75 1.82 2.57 1.11 ...
  ..- attr(*, "scaled:center")= num 107
  ..- attr(*, "scaled:scale")= num 22.6
 $ vote_average         : num [1:4801, 1] 0.92893 0.67714 0.17357 1.26464 0.00572 ...
  ..- attr(*, "scaled:center")= num 6.09
  ..- attr(*, "scaled:scale")= num 1.19
 $ vote_count           : num [1:4801, 1] 9 3.09 3.06 6.82 1.16 ...
  ..- attr(*, "scaled:center")= num 691
  ..- attr(*, "scaled:scale")= num 1235
 $ holiday_month        : Factor w/ 2 levels "0","1": 2 2 1 1 1 2 2 1 1 1 ...
 $ num_lang             : Factor w/ 5 levels "0","1","2","3",..: 3 2 5 2 2 3 2 2 2 2 ...
 $ original_isEnglish   : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
 $ country_USA          : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
 $ country_UK           : Factor w/ 2 levels "0","1": 2 1 2 1 1 1 1 1 2 1 ...
 $ country_Germany      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ country_France       : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ country_Canada       : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ country_Austr        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ genre_Action         : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 1 2 1 2 ...
 $ genre_Adventure      : Factor w/ 2 levels "0","1": 2 2 2 1 2 2 1 2 2 2 ...
 $ genre_Fantasy        : Factor w/ 2 levels "0","1": 2 2 1 1 1 2 1 1 2 2 ...
 $ genre_Science.Fiction: Factor w/ 2 levels "0","1": 2 1 1 1 2 1 1 2 1 1 ...
 $ genre_Crime          : Factor w/ 2 levels "0","1": 1 1 2 2 1 1 1 1 1 1 ...
 $ genre_Drama          : Factor w/ 2 levels "0","1": 1 1 1 2 1 1 1 1 1 1 ...
 $ genre_Thriller       : Factor w/ 2 levels "0","1": 1 1 1 2 1 1 1 1 1 1 ...
 $ genre_Animation      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 2 1 1 1 ...
 $ genre_Family         : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 2 1 2 1 ...
 $ genre_Western        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ genre_Comedy         : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ genre_Romance        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ genre_Horror         : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ genre_Mystery        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ genre_History        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ genre_War            : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ genre_Music          : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ genre_Documentary    : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ genre_Foreign        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ genre_TV.Movie       : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
2.2.9. Let’s explore data to look at our numerical values
summary(tmdb1)
 topDirector topActor  CastGenderAVG.V1        budget.V1         popularity.V1        revenue.V1          runtime.V1     
 0:4397      0:3296   Min.   :-3.696618   Min.   :-0.713363   Min.   :-0.675702   Min.   :-0.505241   Min.   :-4.726524  
 1: 404      1:1505   1st Qu.:-0.657740   1st Qu.:-0.693721   1st Qu.:-0.528620   1st Qu.:-0.505241   1st Qu.:-0.569428  
                      Median : 0.017566   Median :-0.345068   Median :-0.269413   Median :-0.387488   Median :-0.171408  
                      Mean   : 0.000000   Mean   : 0.000000   Mean   : 0.000000   Mean   : 0.000000   Mean   : 0.000000  
                      3rd Qu.: 0.692872   3rd Qu.: 0.268757   3rd Qu.: 0.215253   3rd Qu.: 0.065239   3rd Qu.: 0.491959  
                      Max.   : 1.874658   Max.   : 8.616779   Max.   :26.840682   Max.   :16.611191   Max.   :10.221334  
   vote_average.V1      vote_count.V1    holiday_month num_lang original_isEnglish country_USA country_UK country_Germany
 Min.   :-5.113911   Min.   :-0.559219   0:3269        0:  85   0: 297             0: 845      0:4166     0:4477         
 1st Qu.:-0.413925   1st Qu.:-0.515486   1:1532        1:3354   1:4504             1:3956      1: 635     1: 324         
 Median : 0.089645   Median :-0.368089                 2: 841                                                            
 Mean   : 0.000000   Mean   : 0.000000                 3: 308                                                            
 3rd Qu.: 0.593215   3rd Qu.: 0.037657                 4: 213                                                            
 Max.   : 3.278921   Max.   :10.578132                                                                                   
 country_France country_Canada country_Austr genre_Action genre_Adventure genre_Fantasy genre_Science.Fiction genre_Crime genre_Drama
 0:4495         0:4540         0:4691        0:3647       0:4011          0:4377        0:4266                0:4105      0:2505     
 1: 306         1: 261         1: 110        1:1154       1: 790          1: 424        1: 535                1: 696      1:2296     
                                                                                                                                     
                                                                                                                                     
                                                                                                                                     
                                                                                                                                     
 genre_Thriller genre_Animation genre_Family genre_Western genre_Comedy genre_Romance genre_Horror genre_Mystery genre_History
 0:3527         0:4567          0:4288       0:4719        0:3079       0:3907        0:4282       0:4453        0:4604       
 1:1274         1: 234          1: 513       1:  82        1:1722       1: 894        1: 519       1: 348        1: 197       
                                                                                                                              
                                                                                                                              
                                                                                                                              
                                                                                                                              
 genre_War genre_Music genre_Documentary genre_Foreign genre_TV.Movie
 0:4657    0:4604      0:4692            0:4767        0:4793        
 1: 144    1: 197      1: 109            1:  34        1:   8        
                                                                     
                                                                     
                                                                     
                                                                     
2.2.10. Investigate for missing values
sapply(tmdb1,function(x) sum(is.na(x))) # number of missing values for each variable 
          topDirector              topActor         CastGenderAVG                budget            popularity               revenue 
                    0                     0                     0                     0                     0                     0 
              runtime          vote_average            vote_count         holiday_month              num_lang    original_isEnglish 
                    0                     0                     0                     0                     0                     0 
          country_USA            country_UK       country_Germany        country_France        country_Canada         country_Austr 
                    0                     0                     0                     0                     0                     0 
         genre_Action       genre_Adventure         genre_Fantasy genre_Science.Fiction           genre_Crime           genre_Drama 
                    0                     0                     0                     0                     0                     0 
       genre_Thriller       genre_Animation          genre_Family         genre_Western          genre_Comedy         genre_Romance 
                    0                     0                     0                     0                     0                     0 
         genre_Horror         genre_Mystery         genre_History             genre_War           genre_Music     genre_Documentary 
                    0                     0                     0                     0                     0                     0 
        genre_Foreign        genre_TV.Movie 
                    0                     0 

3. Predictive modeling

Models used to build the predictive model- Linear regression Lasso regression Ridge Regression Regression Trees *Random Forest

Plot scatterplot on numerical variables
scatterplotMatrix(~tmdb1$revenue+tmdb1$budget+tmdb1$popularity+tmdb1$runtime+tmdb1$CastGenderAVG+tmdb1$vote_average+tmdb1$vote_count)

library(corrplot)
corrplot 0.84 loaded
numeric_col <- sapply(tmdb1, is.numeric)
tmdb1_numeric<- tmdb1[,numeric_col]
Correlation<-cor(tmdb1_numeric)
corrplot(Correlation, method = "color")

3.1. Linear Regression

3.1.1. Perfrom stepwise regression to identify the top predictors
null_model<-lm(revenue~1,data=tmdb1)
full_model<-lm(revenue~.,data=tmdb1)
step(null_model, scope = list(lower = null_model, upper = full_model), direction = "both")
Start:  AIC=1
revenue ~ 1

                        Df Sum of Sq    RSS     AIC
+ vote_count             1   2931.28 1868.7 -4526.1
+ budget                 1   2563.52 2236.5 -3663.6
+ popularity             1   1994.92 2805.1 -2576.0
+ genre_Adventure        1    569.13 4230.9  -602.9
+ runtime                1    302.63 4497.4  -309.7
+ topActor               1    255.71 4544.3  -259.8
+ genre_Fantasy          1    216.21 4583.8  -218.3
+ holiday_month          1    200.96 4599.0  -202.3
+ genre_Action           1    198.77 4601.2  -200.0
+ genre_Animation        1    190.66 4609.3  -191.6
+ vote_average           1    186.82 4613.2  -187.6
+ country_USA            1    160.60 4639.4  -160.4
+ genre_Drama            1    150.83 4649.2  -150.3
+ topDirector            1    139.81 4660.2  -138.9
+ genre_Family           1    138.73 4661.3  -137.8
+ genre_Science.Fiction  1    111.72 4688.3  -110.1
+ original_isEnglish     1     48.48 4751.5   -45.7
+ num_lang               4     52.80 4747.2   -44.1
+ CastGenderAVG          1     39.29 4760.7   -36.5
+ genre_Horror           1     32.93 4767.1   -30.1
+ genre_Documentary      1     22.01 4778.0   -19.1
+ genre_Romance          1     20.58 4779.4   -17.6
+ country_France         1     15.90 4784.1   -12.9
+ genre_Comedy           1     12.26 4787.7    -9.3
+ genre_Foreign          1      8.66 4791.3    -5.7
+ genre_Crime            1      8.00 4792.0    -5.0
+ country_Canada         1      6.43 4793.6    -3.4
+ genre_History          1      4.75 4795.2    -1.8
+ genre_Music            1      4.75 4795.2    -1.8
+ country_UK             1      4.19 4795.8    -1.2
+ genre_Western          1      4.09 4795.9    -1.1
+ genre_TV.Movie         1      2.05 4798.0     1.0
<none>                               4800.0     1.0
+ genre_Mystery          1      0.23 4799.8     2.8
+ country_Germany        1      0.17 4799.8     2.8
+ country_Austr          1      0.13 4799.9     2.9
+ genre_Thriller         1      0.10 4799.9     2.9
+ genre_War              1      0.02 4800.0     3.0

Step:  AIC=-4526.08
revenue ~ vote_count

                        Df Sum of Sq    RSS     AIC
+ budget                 1    529.04 1339.7 -6122.0
+ genre_Adventure        1     96.14 1772.6 -4777.7
+ genre_Family           1     73.64 1795.1 -4717.1
+ genre_Animation        1     69.48 1799.2 -4706.0
+ holiday_month          1     53.52 1815.2 -4663.6
+ genre_Fantasy          1     37.66 1831.1 -4621.8
+ genre_Drama            1     32.39 1836.3 -4608.0
+ topDirector            1     16.94 1851.8 -4567.8
+ popularity             1     16.32 1852.4 -4566.2
+ genre_Crime            1     15.60 1853.1 -4564.3
+ country_USA            1     12.24 1856.5 -4555.6
+ vote_average           1     12.08 1856.6 -4555.2
+ genre_Action           1     10.11 1858.6 -4550.1
+ runtime                1      7.71 1861.0 -4543.9
+ genre_Horror           1      7.63 1861.1 -4543.7
+ genre_Thriller         1      6.21 1862.5 -4540.1
+ country_France         1      5.25 1863.5 -4537.6
+ original_isEnglish     1      4.30 1864.4 -4535.1
+ genre_Comedy           1      3.73 1865.0 -4533.7
+ genre_Mystery          1      3.27 1865.5 -4532.5
+ genre_Western          1      2.38 1866.3 -4530.2
+ country_Canada         1      2.08 1866.6 -4529.4
+ country_Germany        1      0.87 1867.9 -4526.3
+ country_UK             1      0.82 1867.9 -4526.2
<none>                               1868.7 -4526.1
+ genre_Science.Fiction  1      0.75 1868.0 -4526.0
+ CastGenderAVG          1      0.47 1868.2 -4525.3
+ topActor               1      0.44 1868.3 -4525.2
+ genre_TV.Movie         1      0.32 1868.4 -4524.9
+ genre_Documentary      1      0.31 1868.4 -4524.9
+ genre_Foreign          1      0.18 1868.5 -4524.5
+ genre_Romance          1      0.17 1868.5 -4524.5
+ genre_War              1      0.14 1868.6 -4524.4
+ genre_History          1      0.11 1868.6 -4524.4
+ genre_Music            1      0.11 1868.6 -4524.4
+ country_Austr          1      0.00 1868.7 -4524.1
+ num_lang               4      1.86 1866.9 -4522.9
- vote_count             1   2931.28 4800.0     1.0

Step:  AIC=-6121.97
revenue ~ vote_count + budget

                        Df Sum of Sq    RSS     AIC
+ genre_Family           1     19.34 1320.3 -6189.8
+ holiday_month          1     17.76 1321.9 -6184.0
+ genre_Animation        1     17.39 1322.3 -6182.7
+ topActor               1     12.23 1327.5 -6164.0
+ topDirector            1     11.35 1328.3 -6160.8
+ genre_Crime            1     10.58 1329.1 -6158.0
+ genre_Thriller         1      9.26 1330.4 -6153.3
+ genre_Science.Fiction  1      9.13 1330.5 -6152.8
+ genre_Adventure        1      6.65 1333.0 -6143.9
+ genre_Action           1      5.91 1333.8 -6141.2
+ country_Germany        1      5.73 1334.0 -6140.6
+ genre_Comedy           1      4.83 1334.8 -6137.3
+ genre_Drama            1      4.59 1335.1 -6136.5
+ popularity             1      4.22 1335.5 -6135.1
+ CastGenderAVG          1      3.30 1336.4 -6131.8
+ genre_Romance          1      3.27 1336.4 -6131.7
+ genre_Mystery          1      2.97 1336.7 -6130.6
+ country_France         1      2.44 1337.2 -6128.7
+ genre_Western          1      2.28 1337.4 -6128.2
+ genre_Fantasy          1      1.63 1338.0 -6125.8
+ country_Austr          1      1.09 1338.6 -6123.9
+ genre_History          1      1.07 1338.6 -6123.8
+ genre_Music            1      1.07 1338.6 -6123.8
+ genre_War              1      0.96 1338.7 -6123.4
+ country_Canada         1      0.96 1338.7 -6123.4
+ genre_Documentary      1      0.95 1338.7 -6123.4
+ country_UK             1      0.59 1339.1 -6122.1
<none>                               1339.7 -6122.0
+ vote_average           1      0.46 1339.2 -6121.6
+ genre_Foreign          1      0.22 1339.5 -6120.8
+ runtime                1      0.20 1339.5 -6120.7
+ country_USA            1      0.17 1339.5 -6120.6
+ genre_Horror           1      0.02 1339.7 -6120.1
+ original_isEnglish     1      0.01 1339.7 -6120.0
+ genre_TV.Movie         1      0.00 1339.7 -6120.0
+ num_lang               4      1.52 1338.2 -6119.4
- budget                 1    529.04 1868.7 -4526.1
- vote_count             1    896.80 2236.5 -3663.6

Step:  AIC=-6189.8
revenue ~ vote_count + budget + genre_Family

                        Df Sum of Sq    RSS     AIC
+ holiday_month          1     16.80 1303.5 -6249.3
+ topDirector            1      9.77 1310.6 -6223.4
+ topActor               1      9.46 1310.9 -6222.3
+ genre_Science.Fiction  1      8.38 1312.0 -6218.4
+ genre_Crime            1      7.42 1312.9 -6214.8
+ genre_Animation        1      4.97 1315.4 -6205.9
+ country_Germany        1      4.79 1315.5 -6205.3
+ genre_Thriller         1      4.73 1315.6 -6205.0
+ genre_Romance          1      4.32 1316.0 -6203.5
+ popularity             1      3.86 1316.5 -6201.8
+ genre_Adventure        1      3.37 1317.0 -6200.1
+ CastGenderAVG          1      3.01 1317.3 -6198.8
+ genre_Action           1      3.01 1317.3 -6198.8
+ genre_Drama            1      2.41 1317.9 -6196.6
+ genre_Comedy           1      2.12 1318.2 -6195.5
+ country_France         1      2.11 1318.2 -6195.5
+ genre_Western          1      1.91 1318.4 -6194.7
+ genre_Mystery          1      1.85 1318.5 -6194.5
+ country_Austr          1      1.18 1319.2 -6192.1
+ genre_Documentary      1      1.08 1319.3 -6191.7
+ country_Canada         1      0.75 1319.6 -6190.5
<none>                               1320.3 -6189.8
+ genre_History          1      0.51 1319.8 -6189.7
+ genre_Music            1      0.51 1319.8 -6189.7
+ genre_War              1      0.47 1319.9 -6189.5
+ country_UK             1      0.42 1319.9 -6189.3
+ vote_average           1      0.36 1320.0 -6189.1
+ genre_Foreign          1      0.20 1320.1 -6188.5
+ runtime                1      0.20 1320.1 -6188.5
+ country_USA            1      0.18 1320.2 -6188.4
+ genre_Fantasy          1      0.13 1320.2 -6188.3
+ genre_Horror           1      0.07 1320.3 -6188.1
+ genre_TV.Movie         1      0.05 1320.3 -6188.0
+ original_isEnglish     1      0.02 1320.3 -6187.9
+ num_lang               4      1.21 1319.1 -6186.2
- genre_Family           1     19.34 1339.7 -6122.0
- budget                 1    474.75 1795.1 -4717.1
- vote_count             1    909.88 2230.2 -3675.1

Step:  AIC=-6249.27
revenue ~ vote_count + budget + genre_Family + holiday_month

                        Df Sum of Sq    RSS     AIC
+ topDirector            1     10.42 1293.1 -6285.8
+ topActor               1     10.27 1293.3 -6285.2
+ genre_Science.Fiction  1      8.21 1295.3 -6277.6
+ genre_Crime            1      6.52 1297.0 -6271.3
+ genre_Animation        1      4.66 1298.9 -6264.5
+ country_Germany        1      4.04 1299.5 -6262.2
+ genre_Romance          1      3.89 1299.7 -6261.6
+ genre_Thriller         1      3.64 1299.9 -6260.7
+ popularity             1      3.58 1300.0 -6260.5
+ CastGenderAVG          1      3.35 1300.2 -6259.6
+ genre_Drama            1      3.27 1300.3 -6259.3
+ genre_Adventure        1      3.18 1300.4 -6259.0
+ genre_Action           1      2.70 1300.8 -6257.2
+ genre_Comedy           1      2.35 1301.2 -6255.9
+ genre_Western          1      2.30 1301.2 -6255.8
+ country_France         1      2.20 1301.3 -6255.4
+ genre_Mystery          1      1.24 1302.3 -6251.8
+ genre_Documentary      1      1.14 1302.4 -6251.5
+ country_Austr          1      1.06 1302.5 -6251.2
+ genre_History          1      0.87 1302.7 -6250.5
+ genre_Music            1      0.87 1302.7 -6250.5
+ vote_average           1      0.77 1302.8 -6250.1
+ genre_War              1      0.72 1302.8 -6249.9
+ country_Canada         1      0.58 1303.0 -6249.4
<none>                               1303.5 -6249.3
+ country_UK             1      0.52 1303.0 -6249.2
+ genre_Foreign          1      0.28 1303.2 -6248.3
+ genre_Horror           1      0.27 1303.3 -6248.3
+ country_USA            1      0.16 1303.4 -6247.9
+ genre_Fantasy          1      0.08 1303.5 -6247.6
+ genre_TV.Movie         1      0.07 1303.5 -6247.5
+ original_isEnglish     1      0.01 1303.5 -6247.3
+ runtime                1      0.00 1303.5 -6247.3
+ num_lang               4      1.28 1302.3 -6246.0
- holiday_month          1     16.80 1320.3 -6189.8
- genre_Family           1     18.38 1321.9 -6184.0
- budget                 1    444.14 1747.7 -4843.6
- vote_count             1    903.26 2206.8 -3723.7

Step:  AIC=-6285.8
revenue ~ vote_count + budget + genre_Family + holiday_month + 
    topDirector

                        Df Sum of Sq    RSS     AIC
+ genre_Science.Fiction  1      8.56 1284.6 -6315.7
+ topActor               1      7.86 1285.3 -6313.1
+ genre_Crime            1      5.96 1287.2 -6306.0
+ genre_Animation        1      4.43 1288.7 -6300.3
+ country_Germany        1      4.25 1288.9 -6299.6
+ genre_Romance          1      4.09 1289.0 -6299.0
+ genre_Thriller         1      3.70 1289.4 -6297.5
+ genre_Action           1      3.55 1289.6 -6297.0
+ popularity             1      3.41 1289.7 -6296.5
+ CastGenderAVG          1      3.01 1290.1 -6295.0
+ genre_Adventure        1      2.76 1290.3 -6294.1
+ genre_Western          1      2.48 1290.6 -6293.0
+ genre_Comedy           1      2.43 1290.7 -6292.8
+ country_France         1      2.32 1290.8 -6292.4
+ genre_Drama            1      2.21 1290.9 -6292.0
+ genre_Mystery          1      1.11 1292.0 -6287.9
+ country_Austr          1      1.10 1292.0 -6287.9
+ genre_Documentary      1      1.02 1292.1 -6287.6
+ genre_History          1      0.74 1292.4 -6286.5
+ genre_Music            1      0.74 1292.4 -6286.5
+ country_Canada         1      0.73 1292.4 -6286.5
+ genre_War              1      0.71 1292.4 -6286.4
<none>                               1293.1 -6285.8
+ country_UK             1      0.47 1292.7 -6285.5
+ vote_average           1      0.27 1292.8 -6284.8
+ genre_Foreign          1      0.24 1292.9 -6284.7
+ genre_Horror           1      0.11 1293.0 -6284.2
+ runtime                1      0.11 1293.0 -6284.2
+ genre_TV.Movie         1      0.08 1293.0 -6284.1
+ country_USA            1      0.07 1293.0 -6284.1
+ genre_Fantasy          1      0.02 1293.1 -6283.9
+ original_isEnglish     1      0.00 1293.1 -6283.8
+ num_lang               4      1.25 1291.9 -6282.4
- topDirector            1     10.42 1303.5 -6249.3
- genre_Family           1     16.77 1309.9 -6225.9
- holiday_month          1     17.45 1310.6 -6223.4
- budget                 1    440.53 1733.7 -4880.3
- vote_count             1    893.93 2187.1 -3764.9

Step:  AIC=-6315.67
revenue ~ vote_count + budget + genre_Family + holiday_month + 
    topDirector + genre_Science.Fiction

                        Df Sum of Sq    RSS     AIC
+ topActor               1      9.34 1275.2 -6348.7
+ genre_Crime            1      7.94 1276.6 -6343.4
+ genre_Adventure        1      4.48 1280.1 -6330.4
+ country_Germany        1      4.45 1280.1 -6330.3
+ genre_Animation        1      4.25 1280.3 -6329.6
+ genre_Drama            1      4.14 1280.4 -6329.2
+ popularity             1      3.37 1281.2 -6326.3
+ genre_Romance          1      3.05 1281.5 -6325.1
+ genre_Western          1      2.88 1281.7 -6324.5
+ genre_Thriller         1      2.77 1281.8 -6324.0
+ CastGenderAVG          1      2.47 1282.1 -6322.9
+ country_France         1      2.42 1282.2 -6322.7
+ genre_Action           1      1.93 1282.6 -6320.9
+ genre_Comedy           1      1.68 1282.9 -6320.0
+ genre_History          1      1.15 1283.4 -6318.0
+ genre_Music            1      1.15 1283.4 -6318.0
+ genre_War              1      1.05 1283.5 -6317.6
+ genre_Mystery          1      1.03 1283.5 -6317.5
+ country_Austr          1      0.90 1283.7 -6317.0
+ genre_Documentary      1      0.83 1283.7 -6316.8
+ vote_average           1      0.61 1284.0 -6315.9
<none>                               1284.6 -6315.7
+ country_UK             1      0.48 1284.1 -6315.5
+ country_Canada         1      0.47 1284.1 -6315.4
+ genre_Horror           1      0.40 1284.2 -6315.2
+ genre_Foreign          1      0.19 1284.4 -6314.4
+ country_USA            1      0.10 1284.5 -6314.1
+ genre_TV.Movie         1      0.09 1284.5 -6314.0
+ genre_Fantasy          1      0.06 1284.5 -6313.9
+ runtime                1      0.01 1284.5 -6313.7
+ original_isEnglish     1      0.00 1284.6 -6313.7
+ num_lang               4      1.29 1283.3 -6312.5
- genre_Science.Fiction  1      8.56 1293.1 -6285.8
- topDirector            1     10.76 1295.3 -6277.6
- genre_Family           1     16.05 1300.6 -6258.1
- holiday_month          1     17.29 1301.8 -6253.5
- budget                 1    448.31 1732.9 -4880.4
- vote_count             1    902.13 2186.7 -3763.7

Step:  AIC=-6348.72
revenue ~ vote_count + budget + genre_Family + holiday_month + 
    topDirector + genre_Science.Fiction + topActor

                        Df Sum of Sq    RSS     AIC
+ genre_Crime            1      6.53 1268.7 -6371.4
+ country_Germany        1      4.26 1271.0 -6362.8
+ genre_Animation        1      4.10 1271.1 -6362.2
+ popularity             1      3.89 1271.3 -6361.4
+ genre_Adventure        1      3.61 1271.6 -6360.3
+ genre_Romance          1      3.37 1271.8 -6359.4
+ genre_Western          1      2.81 1272.4 -6357.3
+ genre_Thriller         1      2.69 1272.5 -6356.9
+ genre_Action           1      2.65 1272.6 -6356.7
+ CastGenderAVG          1      2.58 1272.6 -6356.4
+ genre_Drama            1      2.38 1272.8 -6355.7
+ country_France         1      2.27 1273.0 -6355.3
+ genre_Comedy           1      1.59 1273.6 -6352.7
+ genre_History          1      1.07 1274.1 -6350.8
+ genre_Music            1      1.07 1274.1 -6350.8
+ genre_War              1      1.03 1274.2 -6350.6
+ genre_Mystery          1      0.89 1274.3 -6350.1
+ country_Austr          1      0.83 1274.4 -6349.9
+ genre_Documentary      1      0.59 1274.6 -6348.9
<none>                               1275.2 -6348.7
+ country_Canada         1      0.52 1274.7 -6348.7
+ country_UK             1      0.31 1274.9 -6347.9
+ runtime                1      0.21 1275.0 -6347.5
+ vote_average           1      0.19 1275.0 -6347.4
+ original_isEnglish     1      0.13 1275.1 -6347.2
+ genre_TV.Movie         1      0.13 1275.1 -6347.2
+ genre_Foreign          1      0.10 1275.1 -6347.1
+ genre_Horror           1      0.08 1275.1 -6347.0
+ genre_Fantasy          1      0.02 1275.2 -6346.8
+ country_USA            1      0.00 1275.2 -6346.7
+ num_lang               4      1.16 1274.0 -6345.1
- topDirector            1      8.14 1283.4 -6320.2
- topActor               1      9.34 1284.6 -6315.7
- genre_Science.Fiction  1     10.04 1285.3 -6313.1
- genre_Family           1     13.65 1288.9 -6299.6
- holiday_month          1     17.98 1293.2 -6283.5
- budget                 1    456.09 1731.3 -4882.8
- vote_count             1    911.47 2186.7 -3761.7

Step:  AIC=-6371.38
revenue ~ vote_count + budget + genre_Family + holiday_month + 
    topDirector + genre_Science.Fiction + topActor + genre_Crime

                        Df Sum of Sq    RSS     AIC
+ country_Germany        1      3.98 1264.7 -6384.5
+ popularity             1      3.94 1264.7 -6384.3
+ genre_Animation        1      3.78 1264.9 -6383.7
+ genre_Adventure        1      3.12 1265.6 -6381.2
+ genre_Western          1      3.00 1265.7 -6380.7
+ genre_Drama            1      2.36 1266.3 -6378.3
+ genre_Romance          1      2.14 1266.5 -6377.5
+ country_France         1      1.97 1266.7 -6376.8
+ CastGenderAVG          1      1.57 1267.1 -6375.3
+ genre_War              1      1.46 1267.2 -6374.9
+ genre_History          1      1.45 1267.2 -6374.9
+ genre_Music            1      1.45 1267.2 -6374.9
+ genre_Action           1      1.41 1267.3 -6374.7
+ genre_Comedy           1      1.14 1267.5 -6373.7
+ country_Austr          1      0.90 1267.8 -6372.8
+ genre_Thriller         1      0.82 1267.9 -6372.5
<none>                               1268.7 -6371.4
+ country_Canada         1      0.49 1268.2 -6371.3
+ genre_Mystery          1      0.43 1268.2 -6371.0
+ genre_Documentary      1      0.37 1268.3 -6370.8
+ country_UK             1      0.25 1268.4 -6370.3
+ runtime                1      0.24 1268.4 -6370.3
+ original_isEnglish     1      0.13 1268.5 -6369.9
+ vote_average           1      0.12 1268.6 -6369.8
+ genre_TV.Movie         1      0.12 1268.6 -6369.8
+ genre_Foreign          1      0.09 1268.6 -6369.7
+ genre_Horror           1      0.01 1268.7 -6369.4
+ genre_Fantasy          1      0.00 1268.7 -6369.4
+ country_USA            1      0.00 1268.7 -6369.4
+ num_lang               4      1.09 1267.6 -6367.5
- genre_Crime            1      6.53 1275.2 -6348.7
- topDirector            1      7.83 1276.5 -6343.8
- topActor               1      7.94 1276.6 -6343.4
- genre_Family           1     11.34 1280.0 -6330.7
- genre_Science.Fiction  1     11.83 1280.5 -6328.8
- holiday_month          1     16.94 1285.6 -6309.7
- budget                 1    455.35 1724.0 -4901.0
- vote_count             1    914.62 2183.3 -3767.1

Step:  AIC=-6384.46
revenue ~ vote_count + budget + genre_Family + holiday_month + 
    topDirector + genre_Science.Fiction + topActor + genre_Crime + 
    country_Germany

                        Df Sum of Sq    RSS     AIC
+ popularity             1      3.93 1260.8 -6397.4
+ genre_Animation        1      3.68 1261.0 -6396.5
+ genre_Western          1      3.11 1261.6 -6394.3
+ genre_Adventure        1      3.01 1261.7 -6393.9
+ genre_Drama            1      2.15 1262.5 -6390.6
+ genre_Romance          1      2.08 1262.6 -6390.4
+ CastGenderAVG          1      1.45 1263.3 -6388.0
+ country_France         1      1.43 1263.3 -6387.9
+ genre_Action           1      1.32 1263.4 -6387.5
+ genre_History          1      1.23 1263.5 -6387.1
+ genre_Music            1      1.23 1263.5 -6387.1
+ genre_War              1      1.20 1263.5 -6387.0
+ genre_Comedy           1      1.00 1263.7 -6386.3
+ country_Austr          1      0.99 1263.7 -6386.2
+ genre_Thriller         1      0.68 1264.0 -6385.0
<none>                               1264.7 -6384.5
+ country_Canada         1      0.44 1264.3 -6384.1
+ runtime                1      0.38 1264.3 -6383.9
+ genre_Mystery          1      0.38 1264.3 -6383.9
+ genre_Documentary      1      0.33 1264.4 -6383.7
+ genre_TV.Movie         1      0.13 1264.6 -6383.0
+ genre_Foreign          1      0.07 1264.6 -6382.7
+ vote_average           1      0.07 1264.6 -6382.7
+ country_UK             1      0.05 1264.7 -6382.6
+ original_isEnglish     1      0.03 1264.7 -6382.6
+ genre_Horror           1      0.01 1264.7 -6382.5
+ country_USA            1      0.01 1264.7 -6382.5
+ genre_Fantasy          1      0.00 1264.7 -6382.5
+ num_lang               4      1.13 1263.6 -6380.7
- country_Germany        1      3.98 1268.7 -6371.4
- genre_Crime            1      6.25 1271.0 -6362.8
- topActor               1      7.80 1272.5 -6356.9
- topDirector            1      8.04 1272.7 -6356.0
- genre_Family           1     10.76 1275.5 -6345.8
- genre_Science.Fiction  1     11.99 1276.7 -6341.2
- holiday_month          1     16.21 1280.9 -6325.3
- budget                 1    459.13 1723.8 -4899.5
- vote_count             1    910.72 2175.4 -3782.5

Step:  AIC=-6397.41
revenue ~ vote_count + budget + genre_Family + holiday_month + 
    topDirector + genre_Science.Fiction + topActor + genre_Crime + 
    country_Germany + popularity

                        Df Sum of Sq    RSS     AIC
+ genre_Animation        1      3.30 1257.5 -6408.0
+ genre_Western          1      3.04 1257.7 -6407.0
+ genre_Adventure        1      2.73 1258.0 -6405.8
+ genre_Romance          1      2.17 1258.6 -6403.7
+ genre_Drama            1      2.04 1258.7 -6403.2
+ country_France         1      1.49 1259.3 -6401.1
+ CastGenderAVG          1      1.44 1259.3 -6400.9
+ genre_Action           1      1.32 1259.5 -6400.4
+ genre_War              1      1.23 1259.5 -6400.1
+ genre_History          1      1.22 1259.5 -6400.1
+ genre_Music            1      1.22 1259.5 -6400.1
+ country_Austr          1      1.04 1259.7 -6399.4
+ genre_Comedy           1      1.00 1259.8 -6399.2
+ genre_Thriller         1      0.84 1259.9 -6398.6
<none>                               1260.8 -6397.4
+ country_Canada         1      0.49 1260.3 -6397.3
+ genre_Mystery          1      0.42 1260.4 -6397.0
+ genre_Documentary      1      0.40 1260.4 -6396.9
+ runtime                1      0.36 1260.4 -6396.8
+ vote_average           1      0.14 1260.6 -6395.9
+ genre_TV.Movie         1      0.12 1260.7 -6395.8
+ genre_Foreign          1      0.10 1260.7 -6395.8
+ country_UK             1      0.07 1260.7 -6395.7
+ country_USA            1      0.03 1260.7 -6395.5
+ original_isEnglish     1      0.03 1260.8 -6395.5
+ genre_Fantasy          1      0.00 1260.8 -6395.4
+ genre_Horror           1      0.00 1260.8 -6395.4
+ num_lang               4      1.23 1259.5 -6394.1
- popularity             1      3.93 1264.7 -6384.5
- country_Germany        1      3.97 1264.7 -6384.3
- genre_Crime            1      6.29 1267.1 -6375.5
- topDirector            1      7.81 1268.6 -6369.8
- topActor               1      8.28 1269.1 -6368.0
- genre_Family           1     10.45 1271.2 -6359.8
- genre_Science.Fiction  1     11.99 1272.8 -6354.0
- holiday_month          1     15.94 1276.7 -6339.1
- vote_count             1    441.96 1702.7 -4956.6
- budget                 1    451.31 1712.1 -4930.4

Step:  AIC=-6408.01
revenue ~ vote_count + budget + genre_Family + holiday_month + 
    topDirector + genre_Science.Fiction + topActor + genre_Crime + 
    country_Germany + popularity + genre_Animation

                        Df Sum of Sq    RSS     AIC
+ genre_Western          1      3.04 1254.4 -6417.6
+ genre_Adventure        1      2.49 1255.0 -6415.5
+ genre_Romance          1      2.46 1255.0 -6415.4
+ country_France         1      1.62 1255.8 -6412.2
+ genre_Drama            1      1.61 1255.9 -6412.2
+ CastGenderAVG          1      1.60 1255.9 -6412.1
+ genre_War              1      1.18 1256.3 -6410.5
+ genre_History          1      1.15 1256.3 -6410.4
+ genre_Music            1      1.15 1256.3 -6410.4
+ genre_Action           1      1.07 1256.4 -6410.1
+ country_Austr          1      0.99 1256.5 -6409.8
+ genre_Comedy           1      0.98 1256.5 -6409.7
+ runtime                1      0.82 1256.7 -6409.2
+ genre_Thriller         1      0.73 1256.7 -6408.8
<none>                               1257.5 -6408.0
+ country_Canada         1      0.46 1257.0 -6407.8
+ genre_Documentary      1      0.43 1257.0 -6407.6
+ genre_Mystery          1      0.37 1257.1 -6407.4
+ vote_average           1      0.23 1257.2 -6406.9
+ original_isEnglish     1      0.12 1257.3 -6406.4
+ genre_TV.Movie         1      0.12 1257.4 -6406.4
+ genre_Foreign          1      0.10 1257.4 -6406.4
+ country_UK             1      0.03 1257.4 -6406.1
+ genre_Horror           1      0.00 1257.5 -6406.0
+ country_USA            1      0.00 1257.5 -6406.0
+ genre_Fantasy          1      0.00 1257.5 -6406.0
+ num_lang               4      1.18 1256.3 -6404.5
- genre_Animation        1      3.30 1260.8 -6397.4
- genre_Family           1      3.53 1261.0 -6396.5
- popularity             1      3.55 1261.0 -6396.5
- country_Germany        1      3.88 1261.3 -6395.2
- genre_Crime            1      6.00 1263.5 -6387.2
- topDirector            1      7.67 1265.1 -6380.8
- topActor               1      8.16 1265.6 -6378.9
- genre_Science.Fiction  1     11.74 1269.2 -6365.4
- holiday_month          1     15.72 1273.2 -6350.3
- budget                 1    439.47 1696.9 -4971.0
- vote_count             1    443.64 1701.1 -4959.2

Step:  AIC=-6417.61
revenue ~ vote_count + budget + genre_Family + holiday_month + 
    topDirector + genre_Science.Fiction + topActor + genre_Crime + 
    country_Germany + popularity + genre_Animation + genre_Western

                        Df Sum of Sq    RSS     AIC
+ genre_Adventure        1      2.81 1251.6 -6426.4
+ genre_Romance          1      2.33 1252.1 -6424.5
+ genre_Drama            1      1.76 1252.7 -6422.4
+ country_France         1      1.62 1252.8 -6421.8
+ genre_War              1      1.19 1253.2 -6420.2
+ CastGenderAVG          1      1.11 1253.3 -6419.8
+ genre_History          1      1.04 1253.4 -6419.6
+ genre_Music            1      1.04 1253.4 -6419.6
+ runtime                1      1.03 1253.4 -6419.5
+ country_Austr          1      0.94 1253.5 -6419.2
+ genre_Thriller         1      0.89 1253.5 -6419.0
+ genre_Comedy           1      0.84 1253.6 -6418.8
+ genre_Action           1      0.83 1253.6 -6418.8
<none>                               1254.4 -6417.6
+ genre_Mystery          1      0.42 1254.0 -6417.2
+ country_Canada         1      0.41 1254.0 -6417.2
+ genre_Documentary      1      0.37 1254.1 -6417.0
+ vote_average           1      0.21 1254.2 -6416.4
+ genre_TV.Movie         1      0.12 1254.3 -6416.1
+ original_isEnglish     1      0.11 1254.3 -6416.0
+ genre_Foreign          1      0.08 1254.3 -6415.9
+ country_UK             1      0.04 1254.4 -6415.8
+ genre_Fantasy          1      0.00 1254.4 -6415.6
+ country_USA            1      0.00 1254.4 -6415.6
+ genre_Horror           1      0.00 1254.4 -6415.6
+ num_lang               4      1.15 1253.3 -6414.0
- genre_Western          1      3.04 1257.5 -6408.0
- genre_Animation        1      3.30 1257.7 -6407.0
- genre_Family           1      3.34 1257.8 -6406.9
- popularity             1      3.48 1257.9 -6406.3
- country_Germany        1      4.00 1258.4 -6404.3
- genre_Crime            1      6.17 1260.6 -6396.1
- topDirector            1      7.85 1262.3 -6389.7
- topActor               1      8.07 1262.5 -6388.8
- genre_Science.Fiction  1     12.24 1266.7 -6373.0
- holiday_month          1     16.15 1270.6 -6358.2
- budget                 1    439.74 1694.2 -4976.9
- vote_count             1    444.17 1698.6 -4964.3

Step:  AIC=-6426.39
revenue ~ vote_count + budget + genre_Family + holiday_month + 
    topDirector + genre_Science.Fiction + topActor + genre_Crime + 
    country_Germany + popularity + genre_Animation + genre_Western + 
    genre_Adventure

                        Df Sum of Sq    RSS     AIC
+ genre_Romance          1      2.67 1249.0 -6434.6
+ genre_Action           1      2.20 1249.4 -6432.8
+ CastGenderAVG          1      1.69 1249.9 -6430.9
+ country_France         1      1.63 1250.0 -6430.7
+ genre_Drama            1      1.39 1250.2 -6429.7
+ genre_War              1      1.30 1250.3 -6429.4
+ genre_Comedy           1      1.07 1250.5 -6428.5
+ genre_History          1      1.05 1250.6 -6428.4
+ genre_Music            1      1.05 1250.6 -6428.4
+ country_Austr          1      0.98 1250.6 -6428.2
+ genre_Thriller         1      0.94 1250.7 -6428.0
+ runtime                1      0.93 1250.7 -6427.9
<none>                               1251.6 -6426.4
+ genre_Documentary      1      0.41 1251.2 -6426.0
+ country_Canada         1      0.40 1251.2 -6425.9
+ genre_Mystery          1      0.35 1251.3 -6425.8
+ vote_average           1      0.21 1251.4 -6425.2
+ original_isEnglish     1      0.14 1251.5 -6424.9
+ country_UK             1      0.12 1251.5 -6424.8
+ genre_TV.Movie         1      0.10 1251.5 -6424.8
+ genre_Foreign          1      0.08 1251.5 -6424.7
+ genre_Fantasy          1      0.05 1251.6 -6424.6
+ genre_Horror           1      0.02 1251.6 -6424.5
+ country_USA            1      0.00 1251.6 -6424.4
+ num_lang               4      1.13 1250.5 -6422.7
- genre_Family           1      2.58 1254.2 -6418.5
- genre_Adventure        1      2.81 1254.4 -6417.6
- genre_Animation        1      3.05 1254.7 -6416.7
- popularity             1      3.22 1254.8 -6416.1
- genre_Western          1      3.36 1255.0 -6415.5
- country_Germany        1      3.90 1255.5 -6413.4
- genre_Crime            1      5.74 1257.4 -6406.4
- topActor               1      7.38 1259.0 -6400.2
- topDirector            1      7.64 1259.3 -6399.2
- genre_Science.Fiction  1     13.67 1265.3 -6376.3
- holiday_month          1     16.00 1267.6 -6367.4
- budget                 1    387.46 1639.1 -5133.6
- vote_count             1    443.83 1695.5 -4971.2

Step:  AIC=-6434.64
revenue ~ vote_count + budget + genre_Family + holiday_month + 
    topDirector + genre_Science.Fiction + topActor + genre_Crime + 
    country_Germany + popularity + genre_Animation + genre_Western + 
    genre_Adventure + genre_Romance

                        Df Sum of Sq    RSS     AIC
+ genre_Drama            1      2.01 1246.9 -6440.4
+ country_France         1      1.67 1247.3 -6439.1
+ genre_Action           1      1.64 1247.3 -6439.0
+ genre_War              1      1.23 1247.7 -6437.4
+ CastGenderAVG          1      1.06 1247.9 -6436.7
+ country_Austr          1      0.93 1248.0 -6436.2
+ genre_History          1      0.92 1248.0 -6436.2
+ genre_Music            1      0.92 1248.0 -6436.2
+ runtime                1      0.73 1248.2 -6435.4
+ genre_Documentary      1      0.66 1248.3 -6435.2
+ genre_Comedy           1      0.59 1248.4 -6434.9
<none>                               1249.0 -6434.6
+ genre_Thriller         1      0.47 1248.5 -6434.4
+ vote_average           1      0.32 1248.6 -6433.9
+ country_Canada         1      0.32 1248.6 -6433.9
+ genre_Mystery          1      0.23 1248.7 -6433.5
+ genre_Horror           1      0.19 1248.8 -6433.4
+ country_UK             1      0.14 1248.8 -6433.2
+ original_isEnglish     1      0.14 1248.8 -6433.2
+ genre_TV.Movie         1      0.13 1248.8 -6433.1
+ genre_Foreign          1      0.08 1248.9 -6432.9
+ genre_Fantasy          1      0.06 1248.9 -6432.9
+ country_USA            1      0.00 1249.0 -6432.6
+ num_lang               4      1.19 1247.8 -6431.2
- genre_Romance          1      2.67 1251.6 -6426.4
- genre_Family           1      2.76 1251.7 -6426.1
- genre_Adventure        1      3.16 1252.1 -6424.5
- genre_Western          1      3.23 1252.2 -6424.3
- popularity             1      3.27 1252.2 -6424.1
- genre_Animation        1      3.34 1252.3 -6423.8
- country_Germany        1      3.82 1252.8 -6422.0
- genre_Crime            1      4.45 1253.4 -6419.6
- topActor               1      7.72 1256.7 -6407.1
- topDirector            1      7.73 1256.7 -6407.0
- genre_Science.Fiction  1     12.29 1261.2 -6389.6
- holiday_month          1     15.73 1264.7 -6376.5
- budget                 1    388.69 1637.6 -5135.8
- vote_count             1    444.81 1693.8 -4974.0

Step:  AIC=-6440.39
revenue ~ vote_count + budget + genre_Family + holiday_month + 
    topDirector + genre_Science.Fiction + topActor + genre_Crime + 
    country_Germany + popularity + genre_Animation + genre_Western + 
    genre_Adventure + genre_Romance + genre_Drama

                        Df Sum of Sq    RSS     AIC
+ genre_Action           1      2.10 1244.8 -6446.5
+ runtime                1      1.76 1245.2 -6445.2
+ country_France         1      1.56 1245.4 -6444.4
+ CastGenderAVG          1      1.06 1245.9 -6442.5
+ country_Austr          1      0.98 1246.0 -6442.2
+ genre_War              1      0.90 1246.0 -6441.9
+ genre_Thriller         1      0.55 1246.4 -6440.5
+ genre_History          1      0.55 1246.4 -6440.5
+ genre_Music            1      0.55 1246.4 -6440.5
<none>                               1246.9 -6440.4
+ genre_Documentary      1      0.37 1246.6 -6439.8
+ country_Canada         1      0.33 1246.6 -6439.6
+ genre_Mystery          1      0.21 1246.7 -6439.2
+ genre_Comedy           1      0.15 1246.8 -6439.0
+ genre_Foreign          1      0.12 1246.8 -6438.9
+ genre_TV.Movie         1      0.12 1246.8 -6438.8
+ genre_Fantasy          1      0.11 1246.8 -6438.8
+ country_UK             1      0.10 1246.8 -6438.8
+ vote_average           1      0.05 1246.9 -6438.6
+ original_isEnglish     1      0.05 1246.9 -6438.6
+ country_USA            1      0.02 1246.9 -6438.5
+ genre_Horror           1      0.01 1246.9 -6438.4
+ num_lang               4      1.22 1245.7 -6437.1
- genre_Drama            1      2.01 1249.0 -6434.6
- genre_Family           1      2.53 1249.5 -6432.6
- genre_Adventure        1      2.72 1249.7 -6431.9
- genre_Animation        1      2.91 1249.8 -6431.2
- popularity             1      3.22 1250.2 -6430.0
- genre_Romance          1      3.30 1250.2 -6429.7
- genre_Western          1      3.35 1250.3 -6429.5
- country_Germany        1      3.63 1250.6 -6428.4
- genre_Crime            1      4.36 1251.3 -6425.6
- topActor               1      6.34 1253.3 -6418.0
- topDirector            1      7.11 1254.0 -6415.1
- genre_Science.Fiction  1     13.47 1260.4 -6390.8
- holiday_month          1     16.34 1263.3 -6379.9
- budget                 1    377.70 1624.6 -5172.1
- vote_count             1    443.79 1690.7 -4980.6

Step:  AIC=-6446.47
revenue ~ vote_count + budget + genre_Family + holiday_month + 
    topDirector + genre_Science.Fiction + topActor + genre_Crime + 
    country_Germany + popularity + genre_Animation + genre_Western + 
    genre_Adventure + genre_Romance + genre_Drama + genre_Action

                        Df Sum of Sq    RSS     AIC
+ runtime                1      1.81 1243.0 -6451.5
+ country_France         1      1.49 1243.3 -6450.2
+ country_Austr          1      0.92 1243.9 -6448.0
+ genre_War              1      0.71 1244.1 -6447.2
+ CastGenderAVG          1      0.65 1244.2 -6447.0
<none>                               1244.8 -6446.5
+ genre_History          1      0.44 1244.4 -6446.2
+ genre_Music            1      0.44 1244.4 -6446.2
+ genre_Mystery          1      0.37 1244.5 -6445.9
+ country_Canada         1      0.32 1244.5 -6445.7
+ genre_Documentary      1      0.27 1244.6 -6445.5
+ genre_Thriller         1      0.26 1244.6 -6445.5
+ genre_Foreign          1      0.13 1244.7 -6445.0
+ genre_Fantasy          1      0.12 1244.7 -6444.9
+ genre_TV.Movie         1      0.10 1244.7 -6444.9
+ country_UK             1      0.10 1244.7 -6444.9
+ vote_average           1      0.08 1244.8 -6444.8
+ genre_Comedy           1      0.07 1244.8 -6444.7
+ original_isEnglish     1      0.03 1244.8 -6444.6
+ country_USA            1      0.03 1244.8 -6444.6
+ genre_Horror           1      0.00 1244.8 -6444.5
+ num_lang               4      1.15 1243.7 -6442.9
- genre_Family           1      1.84 1246.7 -6441.4
- genre_Action           1      2.10 1246.9 -6440.4
- genre_Animation        1      2.43 1247.3 -6439.1
- genre_Drama            1      2.47 1247.3 -6439.0
- genre_Romance          1      2.69 1247.5 -6438.1
- genre_Western          1      3.06 1247.9 -6436.7
- popularity             1      3.16 1248.0 -6436.3
- genre_Crime            1      3.18 1248.0 -6436.2
- country_Germany        1      3.48 1248.3 -6435.1
- genre_Adventure        1      3.99 1248.8 -6433.1
- topActor               1      6.73 1251.6 -6422.6
- topDirector            1      7.50 1252.3 -6419.6
- genre_Science.Fiction  1     11.89 1256.7 -6402.8
- holiday_month          1     16.28 1261.1 -6386.1
- budget                 1    375.42 1620.3 -5183.0
- vote_count             1    444.13 1689.0 -4983.6

Step:  AIC=-6451.46
revenue ~ vote_count + budget + genre_Family + holiday_month + 
    topDirector + genre_Science.Fiction + topActor + genre_Crime + 
    country_Germany + popularity + genre_Animation + genre_Western + 
    genre_Adventure + genre_Romance + genre_Drama + genre_Action + 
    runtime

                        Df Sum of Sq    RSS     AIC
+ country_France         1      1.59 1241.4 -6455.6
+ genre_War              1      1.14 1241.9 -6453.9
+ genre_History          1      1.02 1242.0 -6453.4
+ genre_Music            1      1.02 1242.0 -6453.4
+ CastGenderAVG          1      0.97 1242.1 -6453.2
+ country_Austr          1      0.88 1242.1 -6452.9
<none>                               1243.0 -6451.5
+ vote_average           1      0.48 1242.5 -6451.3
+ genre_Mystery          1      0.40 1242.6 -6451.0
+ genre_Documentary      1      0.28 1242.8 -6450.6
+ country_Canada         1      0.27 1242.8 -6450.5
+ genre_Thriller         1      0.23 1242.8 -6450.4
+ genre_Comedy           1      0.19 1242.8 -6450.2
+ country_UK             1      0.15 1242.9 -6450.0
+ genre_Fantasy          1      0.11 1242.9 -6449.9
+ original_isEnglish     1      0.11 1242.9 -6449.9
+ genre_Foreign          1      0.10 1242.9 -6449.9
+ genre_TV.Movie         1      0.08 1243.0 -6449.8
+ country_USA            1      0.03 1243.0 -6449.6
+ genre_Horror           1      0.00 1243.0 -6449.5
+ num_lang               4      1.43 1241.6 -6449.0
- runtime                1      1.81 1244.8 -6446.5
- genre_Family           1      2.09 1245.1 -6445.4
- genre_Action           1      2.15 1245.2 -6445.2
- genre_Romance          1      2.56 1245.6 -6443.6
- genre_Animation        1      3.01 1246.0 -6441.8
- popularity             1      3.05 1246.1 -6441.7
- genre_Crime            1      3.23 1246.3 -6441.0
- genre_Western          1      3.38 1246.4 -6440.4
- genre_Drama            1      3.59 1246.6 -6439.6
- genre_Adventure        1      3.68 1246.7 -6439.3
- country_Germany        1      3.76 1246.8 -6439.0
- topActor               1      7.15 1250.2 -6425.9
- topDirector            1      8.00 1251.0 -6422.7
- genre_Science.Fiction  1     11.72 1254.8 -6408.4
- holiday_month          1     15.09 1258.1 -6395.5
- budget                 1    354.07 1597.1 -5250.1
- vote_count             1    437.58 1680.6 -5005.5

Step:  AIC=-6455.6
revenue ~ vote_count + budget + genre_Family + holiday_month + 
    topDirector + genre_Science.Fiction + topActor + genre_Crime + 
    country_Germany + popularity + genre_Animation + genre_Western + 
    genre_Adventure + genre_Romance + genre_Drama + genre_Action + 
    runtime + country_France

                        Df Sum of Sq    RSS     AIC
+ genre_War              1      1.06 1240.4 -6457.7
+ CastGenderAVG          1      1.00 1240.4 -6457.5
+ genre_History          1      0.97 1240.5 -6457.3
+ genre_Music            1      0.97 1240.5 -6457.3
+ country_Austr          1      0.91 1240.5 -6457.1
<none>                               1241.4 -6455.6
+ vote_average           1      0.42 1241.0 -6455.2
+ genre_Mystery          1      0.38 1241.1 -6455.1
+ genre_Documentary      1      0.30 1241.1 -6454.8
+ country_USA            1      0.24 1241.2 -6454.5
+ genre_Thriller         1      0.21 1241.2 -6454.4
+ country_Canada         1      0.20 1241.2 -6454.4
+ genre_Comedy           1      0.13 1241.3 -6454.1
+ genre_Fantasy          1      0.10 1241.3 -6454.0
+ genre_Foreign          1      0.09 1241.3 -6454.0
+ genre_TV.Movie         1      0.09 1241.3 -6453.9
+ country_UK             1      0.03 1241.4 -6453.7
+ genre_Horror           1      0.00 1241.4 -6453.6
+ original_isEnglish     1      0.00 1241.4 -6453.6
+ num_lang               4      1.34 1240.1 -6452.8
- country_France         1      1.59 1243.0 -6451.5
- runtime                1      1.91 1243.3 -6450.2
- genre_Family           1      2.01 1243.5 -6449.8
- genre_Action           1      2.08 1243.5 -6449.6
- genre_Romance          1      2.59 1244.0 -6447.6
- genre_Crime            1      3.07 1244.5 -6445.8
- popularity             1      3.09 1244.5 -6445.7
- genre_Animation        1      3.17 1244.6 -6445.4
- country_Germany        1      3.23 1244.7 -6445.1
- genre_Western          1      3.39 1244.8 -6444.5
- genre_Drama            1      3.47 1244.9 -6444.2
- genre_Adventure        1      3.67 1245.1 -6443.4
- topActor               1      7.12 1248.6 -6430.1
- topDirector            1      8.11 1249.5 -6426.3
- genre_Science.Fiction  1     11.71 1253.2 -6412.5
- holiday_month          1     15.18 1256.6 -6399.3
- budget                 1    351.43 1592.9 -5260.9
- vote_count             1    436.96 1678.4 -5009.8

Step:  AIC=-6457.69
revenue ~ vote_count + budget + genre_Family + holiday_month + 
    topDirector + genre_Science.Fiction + topActor + genre_Crime + 
    country_Germany + popularity + genre_Animation + genre_Western + 
    genre_Adventure + genre_Romance + genre_Drama + genre_Action + 
    runtime + country_France + genre_War

                        Df Sum of Sq    RSS     AIC
+ country_Austr          1      0.91 1239.5 -6459.2
+ CastGenderAVG          1      0.73 1239.7 -6458.5
+ genre_History          1      0.54 1239.8 -6457.8
+ genre_Music            1      0.54 1239.8 -6457.8
<none>                               1240.4 -6457.7
+ genre_Mystery          1      0.42 1240.0 -6457.3
+ vote_average           1      0.38 1240.0 -6457.2
+ genre_Documentary      1      0.29 1240.1 -6456.8
+ country_USA            1      0.27 1240.1 -6456.7
+ genre_Thriller         1      0.26 1240.1 -6456.7
+ country_Canada         1      0.20 1240.2 -6456.5
+ genre_Fantasy          1      0.13 1240.3 -6456.2
+ genre_Comedy           1      0.10 1240.3 -6456.1
+ genre_TV.Movie         1      0.09 1240.3 -6456.0
+ genre_Foreign          1      0.08 1240.3 -6456.0
+ country_UK             1      0.02 1240.4 -6455.8
+ genre_Horror           1      0.00 1240.4 -6455.7
+ original_isEnglish     1      0.00 1240.4 -6455.7
- genre_War              1      1.06 1241.4 -6455.6
+ num_lang               4      1.43 1239.0 -6455.2
- country_France         1      1.51 1241.9 -6453.9
- genre_Action           1      1.86 1242.2 -6452.5
- genre_Family           1      1.91 1242.3 -6452.3
- runtime                1      2.32 1242.7 -6450.7
- genre_Romance          1      2.48 1242.9 -6450.1
- country_Germany        1      3.07 1243.5 -6447.8
- popularity             1      3.11 1243.5 -6447.7
- genre_Drama            1      3.18 1243.6 -6447.4
- genre_Animation        1      3.26 1243.6 -6447.1
- genre_Crime            1      3.39 1243.8 -6446.6
- genre_Western          1      3.45 1243.8 -6446.3
- genre_Adventure        1      3.70 1244.1 -6445.4
- topActor               1      7.19 1247.6 -6431.9
- topDirector            1      8.18 1248.6 -6428.1
- genre_Science.Fiction  1     12.13 1252.5 -6413.0
- holiday_month          1     15.31 1255.7 -6400.8
- budget                 1    351.22 1591.6 -5262.7
- vote_count             1    436.06 1676.4 -5013.4

Step:  AIC=-6459.2
revenue ~ vote_count + budget + genre_Family + holiday_month + 
    topDirector + genre_Science.Fiction + topActor + genre_Crime + 
    country_Germany + popularity + genre_Animation + genre_Western + 
    genre_Adventure + genre_Romance + genre_Drama + genre_Action + 
    runtime + country_France + genre_War + country_Austr

                        Df Sum of Sq    RSS     AIC
+ CastGenderAVG          1      0.73 1238.8 -6460.0
+ genre_History          1      0.53 1238.9 -6459.3
+ genre_Music            1      0.53 1238.9 -6459.3
<none>                               1239.5 -6459.2
+ genre_Mystery          1      0.40 1239.1 -6458.8
+ vote_average           1      0.37 1239.1 -6458.6
+ country_USA            1      0.35 1239.1 -6458.6
+ genre_Documentary      1      0.28 1239.2 -6458.3
+ genre_Thriller         1      0.24 1239.2 -6458.1
+ country_Canada         1      0.20 1239.3 -6458.0
+ genre_Fantasy          1      0.13 1239.3 -6457.7
- country_Austr          1      0.91 1240.4 -6457.7
+ genre_Foreign          1      0.10 1239.4 -6457.6
+ genre_TV.Movie         1      0.09 1239.4 -6457.6
+ genre_Comedy           1      0.08 1239.4 -6457.5
+ country_UK             1      0.02 1239.5 -6457.3
+ genre_Horror           1      0.00 1239.5 -6457.2
+ original_isEnglish     1      0.00 1239.5 -6457.2
- genre_War              1      1.05 1240.5 -6457.1
+ num_lang               4      1.39 1238.1 -6456.6
- country_France         1      1.53 1241.0 -6455.3
- genre_Action           1      1.81 1241.3 -6454.2
- genre_Family           1      1.94 1241.4 -6453.7
- runtime                1      2.28 1241.8 -6452.4
- genre_Romance          1      2.45 1241.9 -6451.7
- country_Germany        1      3.14 1242.6 -6449.1
- popularity             1      3.15 1242.6 -6449.0
- genre_Animation        1      3.21 1242.7 -6448.8
- genre_Drama            1      3.21 1242.7 -6448.8
- genre_Western          1      3.41 1242.9 -6448.0
- genre_Crime            1      3.46 1242.9 -6447.8
- genre_Adventure        1      3.71 1243.2 -6446.8
- topActor               1      7.09 1246.6 -6433.8
- topDirector            1      8.20 1247.7 -6429.6
- genre_Science.Fiction  1     11.96 1251.4 -6415.1
- holiday_month          1     15.21 1254.7 -6402.7
- budget                 1    352.01 1591.5 -5261.1
- vote_count             1    434.59 1674.1 -5018.2

Step:  AIC=-6460.02
revenue ~ vote_count + budget + genre_Family + holiday_month + 
    topDirector + genre_Science.Fiction + topActor + genre_Crime + 
    country_Germany + popularity + genre_Animation + genre_Western + 
    genre_Adventure + genre_Romance + genre_Drama + genre_Action + 
    runtime + country_France + genre_War + country_Austr + CastGenderAVG

                        Df Sum of Sq    RSS     AIC
<none>                               1238.8 -6460.0
+ genre_History          1      0.45 1238.3 -6459.8
+ genre_Music            1      0.45 1238.3 -6459.8
+ genre_Mystery          1      0.42 1238.3 -6459.6
+ genre_Documentary      1      0.37 1238.4 -6459.5
+ country_USA            1      0.37 1238.4 -6459.4
- CastGenderAVG          1      0.73 1239.5 -6459.2
+ vote_average           1      0.27 1238.5 -6459.1
- genre_War              1      0.78 1239.5 -6459.0
+ genre_Thriller         1      0.22 1238.5 -6458.9
+ country_Canada         1      0.22 1238.5 -6458.9
- country_Austr          1      0.91 1239.7 -6458.5
+ genre_Fantasy          1      0.12 1238.6 -6458.5
+ genre_TV.Movie         1      0.10 1238.7 -6458.4
+ genre_Foreign          1      0.09 1238.7 -6458.4
+ genre_Comedy           1      0.06 1238.7 -6458.3
+ country_UK             1      0.01 1238.7 -6458.1
+ original_isEnglish     1      0.00 1238.8 -6458.0
+ genre_Horror           1      0.00 1238.8 -6458.0
+ num_lang               4      1.44 1237.3 -6457.6
- genre_Action           1      1.44 1240.2 -6456.5
- country_France         1      1.57 1240.3 -6456.0
- genre_Family           1      1.95 1240.7 -6454.5
- genre_Romance          1      1.98 1240.7 -6454.3
- runtime                1      2.53 1241.3 -6452.2
- genre_Western          1      3.04 1241.8 -6450.2
- genre_Crime            1      3.07 1241.8 -6450.1
- country_Germany        1      3.10 1241.8 -6450.0
- popularity             1      3.11 1241.9 -6450.0
- genre_Drama            1      3.30 1242.0 -6449.2
- genre_Animation        1      3.36 1242.1 -6449.0
- genre_Adventure        1      3.94 1242.7 -6446.8
- topActor               1      7.12 1245.9 -6434.5
- topDirector            1      7.98 1246.7 -6431.2
- genre_Science.Fiction  1     11.76 1250.5 -6416.6
- holiday_month          1     15.30 1254.0 -6403.1
- budget                 1    351.97 1590.7 -5261.4
- vote_count             1    434.23 1673.0 -5019.3

Call:
lm(formula = revenue ~ vote_count + budget + genre_Family + holiday_month + 
    topDirector + genre_Science.Fiction + topActor + genre_Crime + 
    country_Germany + popularity + genre_Animation + genre_Western + 
    genre_Adventure + genre_Romance + genre_Drama + genre_Action + 
    runtime + country_France + genre_War + country_Austr + CastGenderAVG, 
    data = tmdb1)

Coefficients:
           (Intercept)              vote_count                  budget           genre_Family1          holiday_month1  
               0.05092                 0.53105                 0.38772                 0.08003                 0.12465  
          topDirector1  genre_Science.Fiction1               topActor1            genre_Crime1        country_Germany1  
              -0.15669                -0.16967                -0.09180                -0.07667                -0.10273  
            popularity        genre_Animation1          genre_Western1        genre_Adventure1          genre_Romance1  
               0.04088                 0.15026                -0.19713                 0.09240                 0.05540  
          genre_Drama1           genre_Action1                 runtime         country_France1              genre_War1  
              -0.06040                -0.04868                 0.02730                -0.07472                -0.07833  
        country_Austr1           CastGenderAVG  
              -0.09208                -0.01349  
summary(full_model)

Call:
lm(formula = revenue ~ ., data = tmdb1)

Residuals:
    Min      1Q  Median      3Q     Max 
-3.8025 -0.1693  0.0017  0.1322  9.7054 

Coefficients: (1 not defined because of singularities)
                        Estimate Std. Error t value Pr(>|t|)    
(Intercept)             0.097735   0.069251   1.411 0.158218    
topDirector1           -0.157837   0.028409  -5.556 2.91e-08 ***
topActor1              -0.088891   0.017824  -4.987 6.34e-07 ***
CastGenderAVG          -0.012994   0.008203  -1.584 0.113245    
budget                  0.389801   0.010915  35.712  < 2e-16 ***
popularity              0.044147   0.011870   3.719 0.000202 ***
runtime                 0.034621   0.009499   3.645 0.000271 ***
vote_average           -0.007514   0.008898  -0.844 0.398442    
vote_count              0.530880   0.013282  39.969  < 2e-16 ***
holiday_month1          0.123663   0.016298   7.588 3.89e-14 ***
num_lang1              -0.051374   0.061032  -0.842 0.399971    
num_lang2              -0.030458   0.063359  -0.481 0.630742    
num_lang3              -0.079123   0.067955  -1.164 0.244343    
num_lang4              -0.003689   0.071847  -0.051 0.959051    
original_isEnglish1     0.025913   0.038366   0.675 0.499442    
country_USA1           -0.032793   0.025847  -1.269 0.204603    
country_UK1            -0.011485   0.024131  -0.476 0.634117    
country_Germany1       -0.100186   0.030246  -3.312 0.000932 ***
country_France1        -0.073767   0.032297  -2.284 0.022415 *  
country_Canada1        -0.033457   0.033374  -1.002 0.316161    
country_Austr1         -0.094867   0.049595  -1.913 0.055830 .  
genre_Action1          -0.043894   0.021654  -2.027 0.042714 *  
genre_Adventure1        0.093637   0.024105   3.885 0.000104 ***
genre_Fantasy1         -0.019503   0.028095  -0.694 0.487603    
genre_Science.Fiction1 -0.164991   0.025564  -6.454 1.20e-10 ***
genre_Crime1           -0.068867   0.023378  -2.946 0.003236 ** 
genre_Drama1           -0.050728   0.018944  -2.678 0.007436 ** 
genre_Thriller1        -0.013727   0.020770  -0.661 0.508686    
genre_Animation1        0.155027   0.042564   3.642 0.000273 ***
genre_Family1           0.082812   0.030178   2.744 0.006090 ** 
genre_Western1         -0.193558   0.057979  -3.338 0.000849 ***
genre_Comedy1           0.009134   0.018760   0.487 0.626351    
genre_Romance1          0.053442   0.020785   2.571 0.010167 *  
genre_Horror1           0.019980   0.027710   0.721 0.470921    
genre_Mystery1         -0.029991   0.030161  -0.994 0.320093    
genre_History1         -0.064536   0.041295  -1.563 0.118164    
genre_War1             -0.064576   0.047029  -1.373 0.169781    
genre_Music1                  NA         NA      NA       NA    
genre_Documentary1      0.064657   0.053128   1.217 0.223659    
genre_Foreign1          0.032684   0.089623   0.365 0.715361    
genre_TV.Movie1        -0.112887   0.180786  -0.624 0.532380    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.5092 on 4761 degrees of freedom
Multiple R-squared:  0.7428,    Adjusted R-squared:  0.7407 
F-statistic: 352.5 on 39 and 4761 DF,  p-value: < 2.2e-16
dflm1 <- lm(revenue ~ vote_count + budget + genre_Family + holiday_month + 
    topDirector + genre_Science.Fiction + topActor + genre_Crime + 
    country_Germany + popularity + genre_Animation + genre_Western + 
    genre_Adventure + genre_Romance + genre_Drama + genre_Action + 
    runtime, data = tmdb1)
summary(dflm1)

Call:
lm(formula = revenue ~ vote_count + budget + genre_Family + holiday_month + 
    topDirector + genre_Science.Fiction + topActor + genre_Crime + 
    country_Germany + popularity + genre_Animation + genre_Western + 
    genre_Adventure + genre_Romance + genre_Drama + genre_Action + 
    runtime, data = tmdb1)

Residuals:
    Min      1Q  Median      3Q     Max 
-3.7865 -0.1655  0.0053  0.1315  9.7417 

Coefficients:
                        Estimate Std. Error t value Pr(>|t|)    
(Intercept)             0.045886   0.016215   2.830 0.004676 ** 
vote_count              0.532746   0.012983  41.033  < 2e-16 ***
budget                  0.388321   0.010520  36.911  < 2e-16 ***
genre_Family1           0.082813   0.029208   2.835 0.004597 ** 
holiday_month1          0.123720   0.016236   7.620 3.04e-14 ***
topDirector1           -0.156691   0.028239  -5.549 3.03e-08 ***
genre_Science.Fiction1 -0.168836   0.025145  -6.714 2.11e-11 ***
topActor1              -0.091977   0.017533  -5.246 1.62e-07 ***
genre_Crime1           -0.077725   0.022055  -3.524 0.000429 ***
country_Germany1       -0.112386   0.029560  -3.802 0.000145 ***
popularity              0.040446   0.011808   3.425 0.000619 ***
genre_Animation1        0.141833   0.041666   3.404 0.000669 ***
genre_Western1         -0.206501   0.057265  -3.606 0.000314 ***
genre_Adventure1        0.089030   0.023665   3.762 0.000171 ***
genre_Romance1          0.062104   0.019781   3.140 0.001702 ** 
genre_Drama1           -0.062777   0.016887  -3.717 0.000204 ***
genre_Action1          -0.058691   0.020400  -2.877 0.004033 ** 
runtime                 0.022660   0.008585   2.639 0.008330 ** 
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.5098 on 4783 degrees of freedom
Multiple R-squared:  0.741, Adjusted R-squared:  0.7401 
F-statistic: 805.1 on 17 and 4783 DF,  p-value: < 2.2e-16
dflm2 <- lm(revenue ~ budget + runtime + vote_count + genre_Crime + genre_Drama + genre_Animation + genre_Family + + holiday_month:vote_count + topActor:vote_count + topDirector:vote_count + topDirector:budget + genre_Action:vote_count + genre_Adventure:vote_count + genre_Crime:vote_count + genre_Romance:vote_count + genre_Science.Fiction:vote_count + genre_Western:vote_count + holiday_month + vote_average:vote_count + budget:vote_count + runtime:vote_count + vote_count:popularity , data = tmdb1)
summary(dflm2)

Call:
lm(formula = revenue ~ budget + runtime + vote_count + genre_Crime + 
    genre_Drama + genre_Animation + genre_Family + +holiday_month:vote_count + 
    topActor:vote_count + topDirector:vote_count + topDirector:budget + 
    genre_Action:vote_count + genre_Adventure:vote_count + genre_Crime:vote_count + 
    genre_Romance:vote_count + genre_Science.Fiction:vote_count + 
    genre_Western:vote_count + holiday_month + vote_average:vote_count + 
    budget:vote_count + runtime:vote_count + vote_count:popularity, 
    data = tmdb1)

Residuals:
    Min      1Q  Median      3Q     Max 
-2.9928 -0.1596 -0.0271  0.0987  5.3394 

Coefficients:
                                   Estimate Std. Error t value Pr(>|t|)    
(Intercept)                       -0.027209   0.012152  -2.239 0.025201 *  
budget                             0.227194   0.010568  21.499  < 2e-16 ***
runtime                            0.028598   0.007768   3.682 0.000234 ***
vote_count                         0.604785   0.019757  30.611  < 2e-16 ***
genre_Crime1                      -0.068385   0.019209  -3.560 0.000374 ***
genre_Drama1                      -0.063201   0.014650  -4.314 1.63e-05 ***
genre_Animation1                   0.198554   0.037992   5.226 1.80e-07 ***
genre_Family1                      0.126021   0.025939   4.858 1.22e-06 ***
holiday_month1                     0.110184   0.014685   7.503 7.39e-14 ***
vote_count:holiday_month1          0.154833   0.014467  10.702  < 2e-16 ***
vote_count:topActor1              -0.173807   0.015955 -10.893  < 2e-16 ***
vote_count:topDirector1           -0.112115   0.019680  -5.697 1.29e-08 ***
budget:topDirector1                0.100599   0.023552   4.271 1.98e-05 ***
vote_count:genre_Action1          -0.076905   0.016967  -4.533 5.96e-06 ***
vote_count:genre_Adventure1        0.116528   0.018595   6.266 4.02e-10 ***
vote_count:genre_Crime1           -0.126473   0.021737  -5.818 6.33e-09 ***
vote_count:genre_Romance1          0.145046   0.026706   5.431 5.88e-08 ***
vote_count:genre_Science.Fiction1 -0.098211   0.017221  -5.703 1.25e-08 ***
vote_count:genre_Western1         -0.393108   0.047283  -8.314  < 2e-16 ***
vote_count:vote_average           -0.085615   0.010699  -8.002 1.52e-15 ***
budget:vote_count                  0.095212   0.005404  17.619  < 2e-16 ***
runtime:vote_count                 0.045113   0.007598   5.938 3.10e-09 ***
vote_count:popularity             -0.008930   0.001951  -4.577 4.85e-06 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.4601 on 4778 degrees of freedom
Multiple R-squared:  0.7893,    Adjusted R-squared:  0.7883 
F-statistic: 813.5 on 22 and 4778 DF,  p-value: < 2.2e-16
par(mfrow=c(2,2)) # init 4 charts in 1 panel
plot(dflm2)

par(mfrow = c(1, 2))
hist(dflm1$residuals)
qqnorm(dflm1$residuals)
qqline(dflm1$residuals)

library(grid)
library(gridExtra)

Attaching package: ‘gridExtra’

The following object is masked from ‘package:dplyr’:

    combine
g_budget <- ggplot(data=NULL, aes(x=tmdb1$budget, y=dflm2$residuals)) +
  geom_point()
g_votecount <- ggplot(data=NULL, aes(x=tmdb1$vote_count, y=dflm2$residuals)) +
  geom_point()
g_popularity <- ggplot(data=NULL, aes(x=tmdb1$popularity, y=dflm2$residuals)) +
  geom_point()
grid.arrange(g_budget, g_votecount,g_popularity)

3.1.2. Perfrom stepwise regression to identify the top predictors To explore the data, created several histograms of runtime, vote_count, vote_average, popularity to understand their distribution
options(repr.plot.width=6, repr.plot.height=4) 
g1<-ggplot(tmdb1,aes(x=tmdb1$runtime))+geom_histogram(binwidth=5,aes(y=..density..),fill="green4")
g2<-ggplot(tmdb1,aes(x=tmdb1$vote_count))+geom_histogram(binwidth=50,aes(y=..count..),fill="red")
g3<-ggplot(tmdb1,aes(x=tmdb1$popularity))+geom_histogram(binwidth=1,aes(y=..count..),fill="green4")
g4<-ggplot(tmdb1,aes(x=tmdb1$vote_average))+geom_histogram(binwidth=5,aes(y=..count..),fill="red")
grid.arrange(g1,g2,g3,g4,nrow=2,ncol=2)

3.1.3. Split dataset to find test R-squared for linear model
set.seed(1)    # for reproducible example
# training set
train <- sample(1:nrow(tmdb1),0.70*nrow(tmdb1))   # random sample of 75% of data
fit <- lm(revenue ~ + budget + runtime + vote_count + genre_Crime + genre_Drama + genre_Animation + genre_Family + + holiday_month:vote_count + topActor:vote_count + topDirector:vote_count + topDirector:budget + genre_Action:vote_count + genre_Adventure:vote_count + genre_Crime:vote_count + genre_Romance:vote_count + genre_Science.Fiction:vote_count + genre_Western:vote_count + holiday_month + vote_average:vote_count + budget:vote_count + runtime:vote_count + vote_count:popularity , data = tmdb1[train,])
test <- -train
test.pred <- predict(fit,newdata=tmdb1[test,])
test.y    <- tmdb1[test,]$revenue
SS.total      <- sum((test.y - mean(test.y))^2)
SS.residual   <- sum((test.y - test.pred)^2)
SS.regression <- sum((test.pred - mean(test.y))^2)
SS.total - (SS.regression+SS.residual)
[1] 1.270162
# [1] 8958890
# NOT the fraction of variability explained by the model
test.rsq <- 1 - SS.residual/SS.total  
test.rsq
[1] 0.7571309

3.2. Ridge Regression

x <- model.matrix(revenue ~ budget + runtime + vote_count + genre_Crime + genre_Drama + genre_Animation + genre_Family + + holiday_month:vote_count + topActor:vote_count + topDirector:vote_count + topDirector:budget + genre_Action:vote_count + genre_Adventure:vote_count + genre_Crime:vote_count + genre_Romance:vote_count + genre_Science.Fiction:vote_count + genre_Western:vote_count + holiday_month + vote_average:vote_count + budget:vote_count + runtime:vote_count + vote_count:popularity, tmdb1)
y = tmdb1 %>%
  dplyr::select(revenue) %>%
  unlist() %>%
  as.numeric()
3.2.1. Split dataset to train and test
set.seed(1)

train = tmdb1 %>%
  sample_frac(0.7)

test = tmdb1 %>%
  setdiff(train)

x_train = model.matrix(revenue ~ budget + runtime + vote_count + genre_Crime + genre_Drama + genre_Animation + genre_Family + + holiday_month:vote_count + topActor:vote_count + topDirector:vote_count + topDirector:budget + genre_Action:vote_count + genre_Adventure:vote_count + genre_Crime:vote_count + genre_Romance:vote_count + genre_Science.Fiction:vote_count + genre_Western:vote_count + holiday_month + vote_average:vote_count + budget:vote_count + runtime:vote_count + vote_count:popularity, train)[,-1]

x_test = model.matrix(revenue ~ budget + runtime + vote_count + genre_Crime + genre_Drama + genre_Animation + genre_Family + + holiday_month:vote_count + topActor:vote_count + topDirector:vote_count + topDirector:budget + genre_Action:vote_count + genre_Adventure:vote_count + genre_Crime:vote_count + genre_Romance:vote_count + genre_Science.Fiction:vote_count + genre_Western:vote_count + holiday_month + vote_average:vote_count + budget:vote_count + runtime:vote_count + vote_count:popularity, test)[,-1]

y_train = train %>%
  dplyr::select(revenue) %>%
  unlist() %>%
  as.numeric()

y_test = test %>%
  dplyr::select(revenue) %>%
  unlist() %>%
  as.numeric()
3.2.2. Create a set of lambda values and train the model
lambda <- 10^seq(2, -2, length = 100)
ridge_mod = glmnet(x_train, y_train, alpha=0, lambda = lambda)
plot(ridge_mod, xvar='lambda', label = TRUE, main = "Ridge Regression")

#Running 10-fold cross validation.
set.seed(0)
cv.ridge.out = cv.glmnet(x_train, y_train, lambda = lambda, alpha = 0, nfolds = 10)
plot(cv.ridge.out, main = "Ridge Regression\n")

bestlambda.ridge = cv.ridge.out$lambda.min
bestlambda.ridge
[1] 0.01
log(bestlambda.ridge)
[1] -4.60517
#What is the test MSE associated with this best value of lambda?
ridge.bestlambdatrain = predict(ridge_mod, s = bestlambda.ridge, newx = x_test)
mean((ridge.bestlambdatrain - y_test)^2)
[1] 0.2335615
y_predicted <- predict(ridge_mod, s = bestlambda.ridge, newx = x_test)
# Sum of Squares Total and Error
sst <- sum((y_test - mean(y_test))^2)
sse <- sum((y_predicted - y_test)^2)
# R squared
rsq_ridge <- 1 - sse / sst
rsq_ridge
[1] 0.7583279

3.2. Lasso Regression

lasso.models = glmnet(x_train, y_train, alpha = 1, lambda = lambda)
plot(lasso.models)

#Running 10-fold cross validation.
set.seed(0)
cv.lasso.out = cv.glmnet(x_train, y_train, lambda = lambda, alpha = 1, nfolds = 10)
plot(cv.lasso.out, main = "Lasso Regression\n")

bestlambda.lasso = cv.lasso.out$lambda.min
bestlambda.lasso
[1] 0.01
#What is the test MSE associated with this best value of lambda?
lasso.bestlambdatrain = predict(lasso.models, s = bestlambda.lasso, newx = x_test)
mean((lasso.bestlambdatrain - y_test)^2)
[1] 0.244208
#Fit the lasso regression on the train dataset using the best lambda value
#from cross validation; inspect the coefficient estimates.
lasso.out = glmnet(x_train, y_train, alpha = 1)
predict(lasso.out, type = "coefficients", s = bestlambda.lasso)
23 x 1 sparse Matrix of class "dgCMatrix"
                                             1
(Intercept)                       -0.046415751
budget                             0.276864206
runtime                            0.008955606
vote_count                         0.473686578
genre_Crime1                      -0.037614750
genre_Drama1                      -0.035471313
genre_Animation1                   0.200287305
genre_Family1                      0.106017417
holiday_month1                     0.094069163
vote_count:holiday_month1          0.147330369
vote_count:topActor1              -0.007233109
vote_count:topDirector1           -0.106619403
budget:topDirector1                0.073059891
vote_count:genre_Action1           .          
vote_count:genre_Adventure1        0.077844069
vote_count:genre_Crime1           -0.102668311
vote_count:genre_Romance1          0.195088919
vote_count:genre_Science.Fiction1 -0.113163159
vote_count:genre_Western1          .          
vote_count:vote_average           -0.069897402
budget:vote_count                  0.079538822
runtime:vote_count                 0.001773404
vote_count:popularity             -0.002170431
#Let's also inspect the MSE of our final lasso model on all our data.
lasso.bestlambda = predict(lasso.out, s = bestlambda.lasso, newx = x_test)
mean((lasso.bestlambda - y_test)^2)
[1] 0.2442298
y_predicted <- predict(lasso.out, s = bestlambda.ridge, newx = x_test)
# Sum of Squares Total and Error
sst <- sum((y_test - mean(y_test))^2)
sse <- sum((y_predicted - y_test)^2)
# R squared
rsq_lasso <- 1 - sse / sst
rsq_lasso
[1] 0.7472891

3.3. Regression Trees

#Create training and testing datasets
library(rpart)
regTree <- rpart(revenue ~ vote_count + budget +  holiday_month + 
    topDirector +  topActor +  popularity + genre_Family + genre_Crime + genre_Animation + 
    genre_Adventure + genre_Romance + genre_Drama + genre_Action + genre_Science.Fiction +
    runtime, method="anova", data=tmdb1 )
plot(regTree, uniform=TRUE, 
 main="Regression Tree for predicting Revenue")
 text(regTree, use.n=TRUE, cex = 0.6)

library(rpart.plot)
rpart.plot(regTree,digits = 4)

p.rpart <- predict(regTree,tmdb1)
mean((p.rpart-tmdb1$revenue)^2)
[1] 0.27388
# Sum of Squares Total and Error
sst <- sum((tmdb1$revenue - mean(tmdb1$revenue))^2)
sse <- sum((p.rpart - tmdb1$revenue)^2)
# R squared
rsq_tree <- 1 - sse / sst
rsq_tree
[1] 0.7260629

3.4. Random Forest

set.seed(45)
train.index <- sample(row.names(tmdb1), dim(tmdb1)[1]*0.6)
test.index <- setdiff(row.names(tmdb1), train.index)
train <- tmdb1[train.index, ]
test <- tmdb1[test.index, ]
set.seed(100)
library(randomForest)
rf <- randomForest(revenue ~ vote_count + budget + genre_Family + holiday_month + 
topDirector +topActor +popularity + genre_Crime + genre_Animation + 
genre_Adventure + genre_Romance + genre_Drama + genre_Action + genre_Science.Fiction +
runtime,data=train,ntree=500)
pred_rf <- predict(rf,test)
mean((pred_rf - test$revenue)^2)
[1] 0.1856559
# Sum of Squares Total and Error
sst <- sum((test$revenue - mean(test$revenue))^2)
sse <- sum((pred_rf - test$revenue)^2)
# R squared
rsq_rf <- 1 - sse / sst
rsq_rf
[1] 0.7886412
pred_rf1 <- predict(rf,tmdb1)
mean((pred_rf1-tmdb1$revenue)^2)
[1] 0.1082728
#install reptree packages
have.packages <- installed.packages()
cran.packages <- c('devtools','plotrix','randomForest','tree')
to.install <- setdiff(cran.packages, have.packages[,1])
if(length(to.install)>0) install.packages(to.install)
library(devtools)
if(!('reprtree' %in% installed.packages())){
  install_github('araastat/reprtree')
}
for(p in c(cran.packages, 'reprtree')) eval(substitute(library(pkg), list(pkg=p)))
library(randomForest)
library(reprtree)
reprtree:::plot.getTree(rf)

set.seed(100)
library(randomForest)
rf_node <- randomForest(revenue ~ vote_count + budget + genre_Family + holiday_month + 
topDirector +topActor +popularity + genre_Crime + genre_Animation + 
genre_Adventure + genre_Romance + genre_Drama + genre_Action + genre_Science.Fiction +
runtime,data=train, maxnodes=50, ntree=500)
reprtree:::plot.getTree(rf_node)

Conclusion

Predictive Model R-squared Comment
Linear Regression 0.7571309 Linear regression has too many non-linearities. The R-squared on complete dataset was 0.789
Ridge Regression 0.7583279 Ridge Regression add a penalty on sum of squared beta. This has the effect of “shrinking” large values of beta towards zero. As a result the ridge regression estimates are often more accurate.
Lasso Regression 0.7472891 The LASSO works in a similar way to ridge regression except that it uses an L1 penalty. LASSO is not quite as computational efficient as ridge regression.
Regression Trees 0.7260629 A decision tree is built on an entire dataset, using all the features/variables of interest, using all the features/variables of interest
Random Froests 0.7886412 Accuracy keeps increasing as you increase the number of trees, but becomes constant at certain point. Unlike decision tree, it won’t create highly biased model and reduces the variance.

Based on our scaled data our top predictors changed in Linear and Random Forest models. In Random Forest we have vote_count, popularity, genre_adventure, top_actor, holiday_month to be top5 predictors.

We conclude that Random Forests is the best model to predict the revenue. The point of RF is to prevent overfitting. It does this by creating random subsets of the features and building smaller (shallow) trees using the subsets and then it combines the subtrees. The downside of RF is it can be slow if you have a single process but it can be parallelized.

LS0tCnRpdGxlOiAiVE1EQiBNb3ZpZSBEYXRhIEV4cGxvcmF0aW9uIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgojIyBJbnRyb2R1Y3Rpb24KClRoZSBnb2FsIG9mIHRoZSBwcm9qZWN0IGlzIHRvIGRlcml2ZSBpbnNpZ2h0cyBvbiB0aGUgVE1EQiBtb3ZpZSBkYXRzZXQgYW5kIHBlcmZvcm0gcmVncmVzc2lvbiBtb2RlbHMgdG8gcHJlZGljdCByZXZlbnVlIG9mIHRoZSBtb3ZpZS4gVGhpcyBtb2RlbCBjb3VsZCBiZSBsZXZlcmFnZWQgYnkgcHJvZHVjdGlvbiBjb21wYW5pZXMgZm9yIG1ha2luZyBnby9uby1nbyBzY3JlZW5pbmcgZGVjaXNpb25zLgoKVE1EQiBNb3ZpZSBEYXRhc2V0IGF2YWlsYWJsZSBvbiBLYWdnbGUuIExpbms6IGh0dHBzOi8vd3d3LmthZ2dsZS5jb20vdG1kYi90bWRiLW1vdmllLW1ldGFkYXRhCgojIyMjIyBMb2FkIFBhY2thZ2VzCgpgYGB7cn0KbGlicmFyeShwbHlyKSAjZGF0YSBtYW5pcHVsYXRpb24KbGlicmFyeSh0aWR5dmVyc2UpIyBkYXRhIG1hbmlwdWxhdGlvbgpsaWJyYXJ5KGZvcm1hdHRhYmxlKSMgdGFibGUKbGlicmFyeShzcGxpdHN0YWNrc2hhcGUpICMgc3BsaXQgY29sdW1ucwpsaWJyYXJ5KGpzb25saXRlKSAjSlNPTiBmb3JtYXQgCmxpYnJhcnkod29yZGNsb3VkKSAjd29yZGNsb3VkCmxpYnJhcnkoUkNvbG9yQnJld2VyKSAjIENvbG9yIFRoZW1lCmxpYnJhcnkoZ2d0aGVtZXMpICNUaGVtZXMgZm9yIHBsb3QKbGlicmFyeSh0bSkgIyBTZW50aW1lbnQgQW5hbHlzaXMgCmxpYnJhcnkoUlNlbnRpbWVudCkgIyBTZW50aW1lbnQgQW5hbHlzaXMKbGlicmFyeSh6b28pICMgVGltZSAKbGlicmFyeShzdHJpbmdyKSAjU3RyaW5nIE1hbmlwdWxhdGlvbgpsaWJyYXJ5KGdncGxvdDIpCmxpYnJhcnkoVklNKQpsaWJyYXJ5KG1pY2UpCmxpYnJhcnkodmNkKQpyZXF1aXJlKGNhcikKbGlicmFyeSh0YWJwbG90KQpsaWJyYXJ5KFBlcmZvcm1hbmNlQW5hbHl0aWNzKQpsaWJyYXJ5KE1BU1MpCmxpYnJhcnkoZ2xtbmV0KQpsaWJyYXJ5KGRwbHlyKQpgYGAKCiMjIyMjIExvYWQgVE1EQiBkYXRzZXQKCmBgYHtyfQptb3ZpZT1yZWFkX2NzdigidG1kYl81MDAwX21vdmllcy5jc3YiLGNvbF9uYW1lcz1UUlVFLG5hPSJOQSIpCmNyZWRpdD1yZWFkX2NzdigidG1kYl81MDAwX2NyZWRpdHMuY3N2Iixjb2xfbmFtZXM9VFJVRSxuYT0iTkEiKQpgYGAKClRoZSBNb3ZpZSBhbmQgY3JlZGl0IGRhdGEgY29udGFpbiBjb2x1bW5zIG9mIG5lc3RlZCBKU09OIHdoaWNoIG5lZWQgdG8gYmUgc3BsaXQgaW50byBzZXBhcmF0ZSBjb2x1bW5zIGZvciBhY2N1cmF0ZSBhbmFseXNpcy4gSW4gdGhlIG1vdmllIGRhdGFzZXQsIHBvcHVsYXJpdHkgbWVhbnMgdGhlIG51bWJlciBvZiB2aWV3cyBvZiB0aGUgbW92aWUgaW4gdGhlIHdlYnNpdGUgYW5kIHZvdGVfYXZlcmFnZSB0ZWxscyB1cyBhYm91dCB0aGUgbW92aWUgcmF0aW5nLgoKCmBgYHtyfQpnbGltcHNlKG1vdmllKQpgYGAKCgoKYGBge3J9CmdsaW1wc2UoY3JlZGl0KQpgYGAKCiMjMS4gRGF0YSBFeHBsb3JhdG9yeSBBbmFseXNpcwoKYGBge3J9CmdlbnJlZGY9bW92aWUgJT4lIGZpbHRlcihuY2hhcihnZW5yZXMpPjIpICU+JSBtdXRhdGUoanM9bGFwcGx5KGdlbnJlcyxmcm9tSlNPTikpICU+JSB1bm5lc3QoanMpICU+JSBkcGx5cjo6c2VsZWN0KGlkLHRpdGxlLGdlbnJlPW5hbWUpICNDb252ZXJ0IEpTT04gZm9ybWF0IGludG8gZGF0YSBmcmFtZQpzbGljZShnZW5yZWRmKQpgYGAKCiMjIyMjMS4xLiBXb3JkY2xvdWQ6IEdlbnJlIFJlcHJlc2VudGF0aW9uCgogIApgYGB7cn0KI0EgbG9vayBhdCB0aGUgZ2VucmUgdmFyaWV0eSBpbiBvdXIgZGF0YXNldApkZiA8LSBhcy5kYXRhLmZyYW1lKHRhYmxlKGdlbnJlZGYkZ2VucmUpKQpkZjIgPC0gd2l0aChkZixkZltvcmRlcihGcmVxLGRlY3JlYXNpbmcgPSBUUlVFKSxdKQoKI3dvcmRjbG91ZAp3b3JkY2xvdWQod29yZHM9ZGYyJFZhcjEsZnJlcT1kZjIkRnJlcSxtaW4uZnJlcT0xMDAsbWF4LndvcmRzID0gMjAscmFuZG9tLm9yZGVyPUZBTFNFLHJhbmRvbS5jb2xvcj1UUlVFLHJvdC5wZXI9MC4zNSxjb2xvcnMgPSBicmV3ZXIucGFsKDIwLCJEYXJrMiIpLHNjYWxlPWMoNSwuMikpCmBgYAoKCiMjIyMjMS4yLiBXb3JkY2xvdWQ6IFByb2R1Y3Rpb24gQ29tcGFuaWVzCgpgYGB7cn0KcHJvZHVjdGlvbj1tb3ZpZSAlPiUgZmlsdGVyKG5jaGFyKHByb2R1Y3Rpb25fY29tcGFuaWVzKT4yKSAlPiUgbXV0YXRlKGpzPWxhcHBseShwcm9kdWN0aW9uX2NvbXBhbmllcyxmcm9tSlNPTikpICU+JSB1bm5lc3QoanMpICU+JSBkcGx5cjo6c2VsZWN0KGJ1ZGdldCxyZXZlbnVlLGNvbXBhbnk9bmFtZSkKbGFwcGx5KHByb2R1Y3Rpb24sY2xhc3MpCmBgYAoKCmBgYHtyfQpkZiA8LSBhcy5kYXRhLmZyYW1lKHRhYmxlKHByb2R1Y3Rpb24kY29tcGFueSkpCmRmMiA8LSB3aXRoKGRmLGRmW29yZGVyKEZyZXEsZGVjcmVhc2luZyA9IFRSVUUpLF0pCmRmMgoKd29yZGNsb3VkKHdvcmRzPWRmMiRWYXIxLGZyZXE9ZGYyJEZyZXEsbWluLmZyZXE9NTAsbWF4LndvcmRzID0gMjUsY29sb3IgPXJhaW5ib3coNyksc2NhbGU9YygzLDAuNSkpCmBgYAoKIyMjIyMjMS4zLiBXaGljaCB5ZWFyIGhhcyBzZWVuIG1heGltdW0gcmVsZWFzZSBvZiBtb3ZpZXMgPwoKYGBge3J9Cm1vdmllJFllYXI9YXMuZmFjdG9yKGZvcm1hdChtb3ZpZSRyZWxlYXNlX2RhdGUsIiVZIikpCm1vdmllJERhdGU9YXMuZmFjdG9yKGZvcm1hdChtb3ZpZSRyZWxlYXNlX2RhdGUsIiVkIikpCm1vdmllJG1vbnRoPW1vbnRoLmFiYlsoYXMuZmFjdG9yKGZvcm1hdChtb3ZpZSRyZWxlYXNlX2RhdGUsIiVtIikpKV0KCmRmIDwtIGFzLmRhdGEuZnJhbWUodGFibGUobW92aWUkbW9udGgpKQpkZjIgPC0gd2l0aChkZixkZltvcmRlcihGcmVxLGRlY3JlYXNpbmcgPSBUUlVFKSxdKQpkZjIKCmRmMiAlPiUKZ2dwbG90KGFlcyhyZW9yZGVyKFZhcjEsRnJlcSksRnJlcSxmaWxsPVZhcjEpKStnZW9tX2JhcihzdGF0PSJpZGVudGl0eSIpK3RoZW1lKHBsb3QudGl0bGU9ZWxlbWVudF90ZXh0KHNpemU9MTQsZmFjZT0iaXRhbGljIixjb2xvdXI9InJlZCIpLGF4aXMudGV4dC54ID0gZWxlbWVudF90ZXh0KGFuZ2xlPTkwKSxsZWdlbmQucG9zaXRpb249Im5vbmUiKStsYWJzKHg9IiIseT0iVG90YWwgbnVtYmVyIG9mIG1vdmllcyByZWxlYXNlZCIsdGl0bGU9Ik51bWJlciBvZiBNb3ZpZXMgUmVsZWFzZXMgcGVyIG1vbnRoIikrY29vcmRfZmxpcCgpK2dlb21fbGFiZWwoYWVzKGxhYmVsPUZyZXEpKQpgYGAKCiMjIyMjMS40LiBEb2VzIGhpZ2ggYnVkZ2V0IG1vdmllIG5lY2Vzc2FyaWx5IG1lYW4gaGlnaCBwb3B1bGFyaXR5IGFtb25nIHZpZXdlcnM/CgpgYGB7cn0KZGI9bW92aWUgJT4lIGxlZnRfam9pbihjcmVkaXQsYnk9YygiaWQiPSJtb3ZpZV9pZCIpKQpkYl9jcmVkaXQ9ZGIgJT4lIGZpbHRlcihuY2hhcihjYXN0KT4yKSAlPiUgbXV0YXRlKGpzPWxhcHBseShjYXN0LGZyb21KU09OKSkgJT4lIHVubmVzdChqcykKYGBgCgoKYGBge3J9CmdldF9jb3IgPC0gZnVuY3Rpb24oZGYpewogICAgbSA8LSBjb3IoZGYkeCxkZiR5LCB1c2U9InBhaXJ3aXNlLmNvbXBsZXRlLm9icyIpOwogICAgZXEgPC0gc3Vic3RpdHV0ZShleHByPXI9PWNvcixlbnY9bGlzdChjb3I9Zm9ybWF0KG0sIGRpZ2l0cyA9IDQpKSkKICAgIHJldHVybihhcy5jaGFyYWN0ZXIoYXMuZXhwcmVzc2lvbihlcSkgKSkgICAgICAgICAgICAgICAgCn0KCnRlbXA9ZGJfY3JlZGl0ICU+JSAgZHBseXI6OnNlbGVjdChidWRnZXQscG9wdWxhcml0eSkgJT4lIGRpc3RpbmN0KCkgCmdncGxvdCh0ZW1wLGFlcyhidWRnZXQscG9wdWxhcml0eSkpK3N0YXRfYmluX2hleChiaW5zPTE1KStzY2FsZV9maWxsX2Rpc3RpbGxlcihwYWxldHRlPSJTcGVjdHJhbCIpK3N0YXRfc21vb3RoKG1ldGhvZD0ibG0iLGNvbG9yPSJvcmNoaWQiLHNpemU9Mikrc2NhbGVfeF9jb250aW51b3VzKGxhYmVscz1zY2FsZXM6OmNvbW1hKQpgYGAKCmBgYHtyfQp0ZW1wPWRiX2NyZWRpdCAlPiUgIGRwbHlyOjpzZWxlY3QoYnVkZ2V0LHZvdGVfY291bnQpICU+JSBkaXN0aW5jdCgpIApnZ3Bsb3QodGVtcCxhZXMoYnVkZ2V0LHZvdGVfY291bnQpKStzdGF0X2Jpbl9oZXgoYmlucz0xNSkrc2NhbGVfZmlsbF9kaXN0aWxsZXIocGFsZXR0ZT0iU3BlY3RyYWwiKStzdGF0X3Ntb290aChtZXRob2Q9ImxtIixjb2xvcj0ib3JjaGlkIixzaXplPTIpK3NjYWxlX3hfY29udGludW91cyhsYWJlbHM9c2NhbGVzOjpjb21tYSkKYGBgCmBgYHtyfQp0ZW1wPWRiX2NyZWRpdCAlPiUgIGRwbHlyOjpzZWxlY3QoYnVkZ2V0LHJldmVudWUpICU+JSBkaXN0aW5jdCgpIApnZ3Bsb3QodGVtcCxhZXMoYnVkZ2V0LHJldmVudWUpKStzdGF0X2Jpbl9oZXgoYmlucz0xNSkrc2NhbGVfZmlsbF9kaXN0aWxsZXIocGFsZXR0ZT0iU3BlY3RyYWwiKStzdGF0X3Ntb290aChtZXRob2Q9ImxtIixjb2xvcj0ib3JjaGlkIixzaXplPTIpK3NjYWxlX3hfY29udGludW91cyhsYWJlbHM9c2NhbGVzOjpjb21tYSkKYGBgCkZyb20gdGhlIGFib3ZlIHBsb3RzIHdlIGNvdWxkIHJlYWxpc2UgdGhhdCBldmVuIHdoZW4gcHJvZHVjdGlvbiBjb21wYW5pZXMgc3BlbmQgbG90IG9mIG1vbmV5IG9uIHRoZSBtb3ZpZSBpdCB3aWxsIG5vdCB5aWVsZCB0aGUgcmV2ZW51ZSB1bmxlc3MgaW1wb3J0YW50IGZlYXR1cmVzIGxpa2Ugc3RvcnkgYW5kIGRpcmVjdGlvbiBtYWtlIGFuIGltcGFjdCB0byB0aGUgYXVkaWVuY2UuCgojIyMjIzEuNS4gQ29tcGFyaW5nIFJldmVudWUgd2l0aCB2b3RlX2NvdW50LCBwb3B1bGFyaXR5IGFuZCBydW50aW1lCgpgYGB7cn0KZ2dwbG90KGRiX2NyZWRpdCwgYWVzKHggPXJ1bnRpbWUsIHkgPXJldmVudWUpKSsKICBnZW9tX3BvaW50KHNpemU9MSwgYWVzKGNvbG91cj12b3RlX2NvdW50KSkgKwogIGxhYnModGl0bGUgPSAiUnVudGltZSBWcy4gUmV2ZW51ZSBhbmQgdm90ZSBjb3VudCIsIAogICAgICAgeCA9ICJSdW50aW1lIiwgeSA9ICJSZXZlbnVlIikKYGBgCgpgYGB7cn0KZ2dwbG90KGRiX2NyZWRpdCwgYWVzKHggPXBvcHVsYXJpdHksIHkgPXJldmVudWUpKSsKICBnZW9tX3BvaW50KHNpemU9MSwgYWVzKGNvbG91cj12b3RlX2NvdW50KSkgKwogIGxhYnModGl0bGUgPSAiUG9wdWxhcml0eSBWcy4gUmV2ZW51ZSBhbmQgdm90ZSBjb3VudCIsIAogICAgICAgeCA9ICJwb3B1bGFyaXR5IiwgeSA9ICJSZXZlbnVlIikKYGBgCgoKYGBge3J9CmdncGxvdChhZXMoeCA9IGxvZyhyZXZlbnVlKSwgeSA9IGxvZyhidWRnZXQpKSwgZGF0YSA9IGRiX2NyZWRpdCkgKwogIGdlb21fcG9pbnQoYWxwaGEgPSAwLjEsIHBvc2l0aW9uID0gcG9zaXRpb25faml0dGVyKGggPSAwKSkgKwogIGdlb21fc21vb3RoKG1ldGhvZCA9ICdsbScsIGNvbG9yID0gJ3JlZCcpICsKICB5bGFiKCdCdWRnZXQnKSArCiAgeGxhYignUmV2ZW51ZScpICsKICBnZ3RpdGxlKCdCdWRnZXQgVnMgUmV2ZW51ZScpCmBgYAoKYGBge3J9CmdncGxvdChhZXMoeCA9IGxvZyhyZXZlbnVlKSwgeSA9IHZvdGVfYXZlcmFnZSksIGRhdGEgPSBkYl9jcmVkaXQpICsKICBnZW9tX3BvaW50KGFscGhhID0gMC4xLCBwb3NpdGlvbiA9IHBvc2l0aW9uX2ppdHRlcihoID0gMCkpICsKICBnZW9tX3Ntb290aChtZXRob2QgPSAnbG0nLCBjb2xvciA9ICdyZWQnKSArCiAgeWxhYignVE1EQiBzY29yZScpICsKICB4bGFiKCdSZXZlbnVlJykgKwogIGdndGl0bGUoJ1RNREIgc2NvcmUgdnMgUmV2ZW51ZScpCmBgYAoKCmBgYHtyfQpnZ3Bsb3QoYWVzKHggPSBsb2coYnVkZ2V0KSwgeSA9IHZvdGVfYXZlcmFnZSksIGRhdGEgPSBkYl9jcmVkaXQpICsKICBnZW9tX3BvaW50KGFscGhhID0gMC4xLCBwb3NpdGlvbiA9IHBvc2l0aW9uX2ppdHRlcihoID0gMCkpICsKICBnZW9tX3Ntb290aChtZXRob2QgPSAnbG0nLCBjb2xvciA9ICdyZWQnKSArCiAgeWxhYignVE1EQiBzY29yZScpICsKICB4bGFiKCdCdWRnZXQnKSArCiAgZ2d0aXRsZSgnVE1EQiBzY29yZSB2cyBCdWRnZXQnKQpgYGAKCiMjIyMjMS42LiBBdmVyYWdlIE1vdmllIFJhdGluZwoKYGBge3J9CmdncGxvdChkYl9jcmVkaXQsYWVzKHZvdGVfYXZlcmFnZSkpICsKICBnZW9tX2hpc3RvZ3JhbShiaW5zID0gMTAwKSArCiAgZ2VvbV92bGluZSh4aW50ZXJjZXB0ID0gbWVhbih0bWRiJHZvdGVfYXZlcmFnZSxuYS5ybSA9IFRSVUUpLGNvbG91ciA9ICJyZWQiKSArIAogIHlsYWIoIkNvdW50IG9mIE1vdmllcyIpICsgCiAgeGxhYigiQXZlcmFnZSBWb3RlIikgKyAKICBnZ3RpdGxlKCJIaXN0b2dyYW0gZm9yIGF2ZXJhZ2Ugdm90ZSByYXRpbmciKQpgYGAKCk1lYW4gOiA2LjA5MjA4MwoKTGV0IHVzIGxvb2sgYXQgdGhlIHRvcCAyMCBtb3ZpZXMgd2l0aCBoaWdoZXN0IGF2ZXJhZ2Vfdm90ZSB3aXRoIGNvbG9yIGFjY29yZGluZyB0byB2b3RlIGNvdW50LgoKSGVyZSwgbW92aWVzIHdpdGggdm90ZSBjb3VudCA+IDUwMCBhcmUgY29uc2lkZXJlZCBhcyBtb3ZpZXMgd2l0aCBsZXNzIHZvdGUgY291bnRzIGFuZCBoaWdoIHJhdGluZyBjYW4gYmUgYSBtaXNsZWFkaW5nIHN0YXRpc3RpYy4KCmBgYHtyfQpoZWFkKGRiX2NyZWRpdCkKYGBgCgojIyMjIzEuNy4gVHJhc2Zvcm1hdGlvbiBvZiBKU09OIGNvbHVtbiB0byB1bmlxdWUgY29sdW1ucyB0byB0aGUgbW92aWUgZGF0YXNldAoKYGBge3J9CiMjIHRyYW5zZm9ybWF0aW9uIG9mICJrZXl3b3JkcyIgY29sdW1uIGludG8gdGliYmxlCmtleXdvcmRzIDwtIG1vdmllICU+JSAgICAKICBmaWx0ZXIobmNoYXIoa2V5d29yZHMpID4gMikgJT4lICAgICAgICAgICAgICAgICAjIGZpdGVyIG91dCBibGFuayBrZXl3b3JkcyBmaWVsZAogIG11dGF0ZSggICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICMgY3JlYXRlIGEgbmV3IGZpZWxkIAogICAganMgPSBsYXBwbHkoa2V5d29yZHMsIGZyb21KU09OKSAgICAgICAgICAgICAgICMgY29udGFpbmluZyBhIExJU1Qgb2Yga2V5d29yZCBhbmQgdmFsdWUgcGFpcnMKICApICU+JSAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAjIGNhbGxlZCBpZCBhbmQgbmFtZQogIHVubmVzdChqcykgJT4lICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICMgdHVybiBlYWNoIGtleXdvcmQvdmFsdWUgcGFpcnMgaW4gdGhlIExJU1QgaW50byBhIHJvdwogIGRwbHlyOjpzZWxlY3QoaWQsIHRpdGxlLCBrZXl3b3JkcyA9IG5hbWUpCiMjIENvbWJpbmluZyB0aGUga2V5d29yZHMgb2YgYSBtb3ZpZSBpbiBhIHNpbmdsZSBjb2x1bW4Ka2V5d29yZHMgPC0gYWdncmVnYXRlKGtleXdvcmRzIH4uLGRhdGEgPSBrZXl3b3JkcywgcGFzdGUsIGNvbGxhcHNlID0gIiwiKQojQ29tYmluaW5nIHRoZSBnZW5yZXMgb2YgYSBtb3ZpZSBpbiBhIHNpbmdsZSBjb2x1bW4KZ2VucmVzIDwtIG1vdmllICU+JSBmaWx0ZXIobmNoYXIoZ2VucmVzKSA+IDIpICU+JSAgICAgICAgICAgICAgICAgICAKICBtdXRhdGUoIGpzID0gbGFwcGx5KGdlbnJlcywgZnJvbUpTT04pKSAlPiUgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAgdW5uZXN0KGpzKSAlPiUgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAgZHBseXI6OnNlbGVjdChpZCwgdGl0bGUsIGdlbnJlcyA9IG5hbWUpIApnZW5yZXMgPC0gYWdncmVnYXRlKGdlbnJlcyB+LixkYXRhID0gZ2VucmVzLCBwYXN0ZSwgY29sbGFwc2UgPSAiLCIpCiMgQ29tYmluaW5nIHByb2R1Y3Rpb25fY29tcGFuaWVzCnByb2R1Y3Rpb25fY29tcGFuaWVzIDwtIG1vdmllICU+JSBmaWx0ZXIobmNoYXIocHJvZHVjdGlvbl9jb21wYW5pZXMpID4gMikgJT4lICAgICAgICAgICAgICAgICAgIAogIG11dGF0ZSgganMgPSBsYXBwbHkocHJvZHVjdGlvbl9jb21wYW5pZXMsIGZyb21KU09OKSkgJT4lICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAogIHVubmVzdChqcykgJT4lICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAogIGRwbHlyOjpzZWxlY3QoaWQsIHRpdGxlLCBwcm9kdWN0aW9uX2NvbXBhbmllcyA9IG5hbWUpIApwcm9kdWN0aW9uX2NvbXBhbmllcyA8LSBhZ2dyZWdhdGUocHJvZHVjdGlvbl9jb21wYW5pZXMgfi4sZGF0YSA9IHByb2R1Y3Rpb25fY29tcGFuaWVzLCBwYXN0ZSwgY29sbGFwc2UgPSAiLCIpCiMgQ29tYmluaW5nIHByb2R1Y3Rpb24gY291bnRyaWVzCnByb2R1Y3Rpb25fY291bnRyaWVzIDwtIG1vdmllICU+JSAgICAKICBmaWx0ZXIobmNoYXIocHJvZHVjdGlvbl9jb3VudHJpZXMpID4gMikgJT4lICAgICAKICBtdXRhdGUoICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAKICAgIGpzID0gbGFwcGx5KHByb2R1Y3Rpb25fY291bnRyaWVzLCBmcm9tSlNPTikgICAKICApICU+JSAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAogIHVubmVzdChqcykgJT4lICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAogIGRwbHlyOjpzZWxlY3QoaWQsIHRpdGxlLCBwcm9kdWN0aW9uX2NvdW50cmllcyA9IG5hbWUpCmNvdW50cmllcyA8LSBtb3ZpZSAlPiUgICAgCiAgZmlsdGVyKG5jaGFyKHByb2R1Y3Rpb25fY291bnRyaWVzKSA+IDIpICU+JSAgICAgCiAgbXV0YXRlKCAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAgICBqcyA9IGxhcHBseShwcm9kdWN0aW9uX2NvdW50cmllcywgZnJvbUpTT04pICAgCiAgKSAlPiUgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAKICB1bm5lc3QoanMpICU+JSAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAKICBkcGx5cjo6c2VsZWN0KGlkLCB0aXRsZSwgcHJvZHVjdGlvbl9jb3VudHJpZXMgPSBuYW1lKQpwcm9kdWN0aW9uX2NvdW50cmllcyA8LSBhZ2dyZWdhdGUocHJvZHVjdGlvbl9jb3VudHJpZXMgfi4sZGF0YSA9IHByb2R1Y3Rpb25fY291bnRyaWVzLCBwYXN0ZSwgY29sbGFwc2UgPSAiLCIpCiMgY29tYmluaW5nIHNwb2tlbiBsYW5ndWFnZXMKc3Bva2VuX2xhbmd1YWdlcyA8LSBtb3ZpZSAlPiUgICAgCiAgZmlsdGVyKG5jaGFyKHNwb2tlbl9sYW5ndWFnZXMpID4gMikgJT4lICAgICAgICAKICBtdXRhdGUoICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAKICAgIGpzID0gbGFwcGx5KHNwb2tlbl9sYW5ndWFnZXMsIGZyb21KU09OKSAgICAgIAogICkgJT4lICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAgdW5uZXN0KGpzKSAlPiUgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAKICBkcGx5cjo6c2VsZWN0KGlkLCB0aXRsZSwgc3Bva2VuX2xhbmd1YWdlcyA9IGlzb182MzlfMSkgCnNwb2tlbl9sYW5ndWFnZXMgPC0gYWdncmVnYXRlKHNwb2tlbl9sYW5ndWFnZXMgfi4sZGF0YSA9IHNwb2tlbl9sYW5ndWFnZXMsIHBhc3RlLCBjb2xsYXBzZSA9ICIsIikKYGBgCgpgYGB7cn0KbW92aWVzIDwtIHN1YnNldChtb3ZpZSwgc2VsZWN0ID0gLWMoZ2VucmVzLCBrZXl3b3JkcywgcHJvZHVjdGlvbl9jb21wYW5pZXMsIHByb2R1Y3Rpb25fY291bnRyaWVzLHNwb2tlbl9sYW5ndWFnZXMpKQpnbGltcHNlKG1vdmllcykKYGBgCgpgYGB7cn0KIyBEcm9wcGVkIGV4aXN0aW5nIHVuZm9ybWF0dGVkIGNvbHVtbnMgaW4gdGhlIG1haW4gZGF0YXNldCwgY3JlYXRpbmcgYSBuZXcgZGF0YXNldCAibW92aWVzIgptb3ZpZXMgPC0gc3Vic2V0KG1vdmllLCBzZWxlY3QgPSAtYyhnZW5yZXMsIGtleXdvcmRzLCBwcm9kdWN0aW9uX2NvbXBhbmllcywgcHJvZHVjdGlvbl9jb3VudHJpZXMsIHNwb2tlbl9sYW5ndWFnZXMpKQptb3ZpZXMgPC0gbW92aWVzICU+JQogIGZ1bGxfam9pbihrZXl3b3JkcywgYnkgPSBjKCJpZCIsICJ0aXRsZSIpKSAlPiUKICBmdWxsX2pvaW4oZ2VucmVzLCBieSA9IGMoImlkIiwgInRpdGxlIikpICU+JQogIGZ1bGxfam9pbihwcm9kdWN0aW9uX2NvbXBhbmllcywgYnkgPSBjKCJpZCIsICJ0aXRsZSIpKSAlPiUKICBmdWxsX2pvaW4ocHJvZHVjdGlvbl9jb3VudHJpZXMsIGJ5ID0gYygiaWQiLCAidGl0bGUiKSkgJT4lCiAgZnVsbF9qb2luKHNwb2tlbl9sYW5ndWFnZXMsIGJ5ID0gYygiaWQiLCAidGl0bGUiKSkKZ2xpbXBzZShtb3ZpZXMpCmBgYAoKIyMjIyMxLjguIE1vdmllIFJhdGluZyBWcyBCdWRnZXQKCmBgYHtyfQptb3ZpZXMgJT4lIGRwbHlyOjpzZWxlY3QodGl0bGUsdm90ZV9hdmVyYWdlLHZvdGVfY291bnQsIGJ1ZGdldCkgJT4lIAogIGZpbHRlcih2b3RlX2NvdW50ID4gNTAwICkgJT4lIGFycmFuZ2UoZGVzYyh2b3RlX2F2ZXJhZ2UpKSAlPiUgaGVhZCgyMCkgJT4lCiAgZ2dwbG90KGFlcyh4ID0gdGl0bGUseSA9IHZvdGVfYXZlcmFnZSxmaWxsID0gYnVkZ2V0ICkpICsgZ2VvbV9iYXIoc3RhdCA9ICJpZGVudGl0eSIpICsgY29vcmRfZmxpcCh5bGltID0gYyg3LCA5KSkgKwogIHNjYWxlX2ZpbGxfY29udGludW91cygpCmBgYAoKIyMjIyMxLjkuIFBvcHVsYXJpdHkgdnMgQnVkZ2V0IGFyZSBub3QgaGlnaGx5IGNvcnJlbGF0ZWQgCgpgYGB7cn0KI1RvcCAyMCBtb3ZpZXMgYnkgcG9wdWxhcml0eSwgY29sb3IgYWNjb3JkaW5nIHRvIHZvdGUgY291bnQgOgptb3ZpZXMgJT4lIGRwbHlyOjpzZWxlY3QodGl0bGUsdm90ZV9hdmVyYWdlLHZvdGVfY291bnQsIHBvcHVsYXJpdHkpICU+JSAKICBmaWx0ZXIodm90ZV9jb3VudCA+IDMwMCApICU+JSAgaGVhZCgzMCkgJT4lCiAgZ2dwbG90KGFlcyh4ID0gdGl0bGUseSA9IHBvcHVsYXJpdHksIGZpbGwgPSB2b3RlX2NvdW50KSkgKyBnZW9tX2JhcihzdGF0ID0gImlkZW50aXR5IikgKyBjb29yZF9mbGlwKCkgKwogIHNjYWxlX2ZpbGxfY29udGludW91cygpCmBgYAoKIyMjIyMxLjEwLiBBbmFseXNpbmcgTW92aWUgR2VucmUKCmBgYHtyfQpnZW5yZTEgPC0gQ29ycHVzKFZlY3RvclNvdXJjZShnZW5yZXMkZ2VucmVzKSkKZHRtIDwtIERvY3VtZW50VGVybU1hdHJpeChnZW5yZTEpCmdlbnJlX2ZyZXEgPC0gY29sU3Vtcyhhcy5tYXRyaXgoZHRtKSkKZnJlcSA8LSBzb3J0KGNvbFN1bXMoYXMubWF0cml4KGR0bSkpLCBkZWNyZWFzaW5nID0gVFJVRSkgCmdlbnJlX3dmcmFtZSA8LSBkYXRhLmZyYW1lKHdvcmQgPSBuYW1lcyhnZW5yZV9mcmVxKSwgZnJlcSA9IGdlbnJlX2ZyZXEpCmdncGxvdChnZW5yZV93ZnJhbWUsIGFlcyh4ID0gcmVvcmRlcih3b3JkLC1mcmVxKSwgeSA9IGZyZXEpKSArICAKICBnZW9tX2JhcihzdGF0ID0gImlkZW50aXR5IikgKyAKICB0aGVtZShheGlzLnRleHQueCA9IGVsZW1lbnRfdGV4dChhbmdsZSA9IDQ1LCBoanVzdCA9IDEpKSArCiAgZ2d0aXRsZSgiTW92aWUgR2VucmUgZnJlcXVlbmN5IGdyYXBoIikgKyAKICB4bGFiKCJHZW5yZSIpICsgCiAgeWxhYigiRnJlcXVlbmN5IikKYGBgCgojIyMjIzEuMTEuIENvbXBhcmUgUHJvZml0IHdpdGggdGhlIG1vdmllIHJhdGluZyBhbmQgYnVkZ2V0CgpgYGB7cn0KI0FkZGluZyBuZXcgY29sdW1ucyBncm9zcyBhbmQgZ3Jvc3NfZmxhZyAKbW92aWVzIDwtIG1vdmllcyAlPiUKICBtdXRhdGUoZ3Jvc3MgPSByZXZlbnVlIC0gYnVkZ2V0LCBncm9zc19mbGFnID0gaWZlbHNlKGdyb3NzIDwgMCwgIkxvc3MiLCAiUHJvZml0IikpCgpsaWJyYXJ5KHBsb3RseSkKcGxvdF9seShtb3ZpZXMsIHggPSB+dm90ZV9hdmVyYWdlLCB5ID0gfmJ1ZGdldCwgeiA9IH5ncm9zcy8xMDAwMDAwLCAKICAgICAgICBjb2xvciA9IH5ncm9zc19mbGFnLCBjb2xvcnMgPSBjKCcjQkYzODJBJywgJyMwQzRCOEUnKSAsc2l6ZSA9IEkoMykpICU+JQogIGFkZF9tYXJrZXJzKCkgJT4lCiAgbGF5b3V0KHNjZW5lID0gbGlzdCh4YXhpcyA9IGxpc3QodGl0bGUgPSAnQXZlcmFnZSB2b3RlJyksCiAgICAgICAgICAgICAgICAgICAgICB5YXhpcyA9IGxpc3QodGl0bGUgPSAnQnVkZ2V0JyksCiAgICAgICAgICAgICAgICAgICAgICB6YXhpcyA9IGxpc3QodGl0bGUgPSAnR3Jvc3MgKG1pbGxpb24gJCknKSksCiAgICAgICAgIHRpdGxlID0gIklOVEVSQUNUSVZFIDNEIFNjYXR0ZXIgcGxvdDogQXZlcmFnZSB2b3RlIHZzIEJ1ZGdldCB2cyBHcm9zcyIsCiAgICAgICAgIHNob3dsZWdlbmQgPSBGQUxTRSkKYGBgCgpUaGUgYmx1ZSBkb3RzIHJlcHJlc2VudHMgdGhlIG1vdmllIHdpdGggbW92aWVzIG1ha2luZyBwcm9maXRzLCB3ZSBjYW4gaG92ZXIgb3ZlciB0aGUgcGxvdCB0byB1bmRlcnNhdG5kIHRoZSBiZWhhdmlvdXIgb2YgdGhlIHZhcmlhYmxlcyB2b3RlX2F2ZXJhZ2UgYW5mIGJ1ZGdldCB3aXRoIHJlc3BlY3QgdG8gR3Jvc3MuCgojIzIuIERhdGEgQ2xlYW5pbmcKCmBgYHtyfQptb3ZpZXMgPC0gcmVhZC5jc3YoInRtZGJfNTAwMF9tb3ZpZXMuY3N2IiwgaGVhZGVyID0gVFJVRSwgc3RyaW5nc0FzRmFjdG9ycyA9IEZBTFNFKQpjcmVkaXRzIDwtIHJlYWQuY3N2KCJ0bWRiXzUwMDBfY3JlZGl0cy5jc3YiLGhlYWRlciA9IFRSVUUsIHN0cmluZ3NBc0ZhY3RvcnMgPSBGQUxTRSApCmBgYAoKYGBge3J9CgpsaWJyYXJ5KHRpZHl2ZXJzZSkKbGlicmFyeShqc29ubGl0ZSkKYGBgCgpgYGB7cn0KI21vdmllcyA8LSB0bWRiXzUwMDBfbW92aWVzCiNjcmVkaXRzIDwtIHRtZGJfNTAwMF9jcmVkaXRzCgpUb3AuMTAwLkRpcmVjdG9ycy5Xb3JraW5nLlRvZGF5IDwtIHJlYWQuY3N2KCJUb3AgMTAwIERpcmVjdG9ycyBXb3JraW5nIFRvZGF5LmNzdiIsIGhlYWRlciA9IFRSVUUsIHN0cmluZ3NBc0ZhY3RvcnMgPSBGQUxTRSkKVGhlLlRvcC4xMDAuQWN0b3JzLm9mLjIwMTYgPC0gcmVhZC5jc3YoIlRoZSBUb3AgMTAwIEFjdG9ycyBvZiAyMDE2LmNzdiIsaGVhZGVyID0gVFJVRSwgc3RyaW5nc0FzRmFjdG9ycyA9IEZBTFNFICkKCnRvcDEwMGRpcmVjdG9ycyA8LSBUb3AuMTAwLkRpcmVjdG9ycy5Xb3JraW5nLlRvZGF5JE5hbWUKdG9wMTAwZGlyZWN0b3JzIDwtIHBhc3RlKHRvcDEwMGRpcmVjdG9ycykKCnRvcDEwMGFjdG9ycyA8LSBUaGUuVG9wLjEwMC5BY3RvcnMub2YuMjAxNiROYW1lCnRvcDEwMGFjdG9ycyA8LSBwYXN0ZSh0b3AxMDBhY3RvcnMpCgojIGZyb206IGh0dHA6Ly93d3cuaW1kYi5jb20vbGlzdC9sczA3MjU5NjE3My8KI3RvcDEwMGRpcmVjdG9ycyA8LSBUb3AuMTAwLkRpcmVjdG9ycy5Xb3JraW5nLlRvZGF5JE5hbWUKI3RvcDEwMGRpcmVjdG9ycyA8LSBwYXN0ZSh0b3AxMDBkaXJlY3RvcnMpCgojdG9wMTAwYWN0b3JzIDwtIFRoZS5Ub3AuMTAwLkFjdG9ycy5vZi4yMDE2JE5hbWUKI3RvcDEwMGFjdG9ycyA8LSBwYXN0ZSh0b3AxMDBhY3RvcnMpCmBgYAoKYGBge3J9CiMgRHJvcCB1c2VsZXNzIGNvbHVtbnMKbW92aWVzIDwtIG1vdmllc1ssIShuYW1lcyhtb3ZpZXMpICVpbiUgYygnaG9tZXBhZ2UnLCdvdmVydmlldycsJ3N0YXR1cycsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAndGl0bGUnLCd0YWdsaW5lJywnb3JpZ2luYWxfdGl0bGUnKSldCmNyZWRpdHMgPC0gY3JlZGl0c1ssIShuYW1lcyhjcmVkaXRzKSAlaW4lIGMoJ3RpdGxlJykpXQoKZGltKG1vdmllcykKYGBgCgojIyMyLjEuIExldCdzIGNsZWFuIG1vdmllcyBkYXRhc2V0CgojIyMjIzIuMS4xLiBEaXN0aW5jdCBNb250aHM7IEhvbGlkYXkgbW9udGgKCmBgYHtyfQojIEV4dHJhY3QgbW9udGggb2YgcmVsZWFzZSBkYXRlOyBwdXQgaW50byBuZXcgY29sdW1uCm1vdmllcyRyZWxlYXNlX2RhdGUgPC0gYXMuUE9TSVhsdChtb3ZpZXMkcmVsZWFzZV9kYXRlLCBmb3JtYXQ9IiVZLSVtLSVkIikKbW92aWVzJHJlbGVhc2VfbW9udGggPC0gbHVicmlkYXRlOjptb250aChtb3ZpZXMkcmVsZWFzZV9kYXRlKQoKIyBGcm9tIExpbmVhciBSZWdyZXNzaW9uIHVzaW5nIGVhY2ggb2YgMTIgbW9udGhzIGFzIGR1bW15IGNhdGVnb3JpZXMsCiMgd2Ugc2F3IHRoYXQgbW9udGhzIDUsIDYsIDExIGFuZCAxMiBhcmUgaW1wb3J0YW50LAojIHdoaWxlIHRoZSByZXN0IGFyZSB1bmltcG9ydGFudC4KIyBXZSB3aWxsIGFnZ3JlZ2F0ZSB0aGlzIGFzICdob2xpZGF5IG1vbnRoJyAoYmVnaW5uaW5nIG9mIFN1bW1lcjsgYmVnaW5uaW5nIG9mIFdpbnRlcikKbW92aWVzJGhvbGlkYXlfbW9udGggPC0gMAptb3ZpZXNbd2hpY2gobW92aWVzJHJlbGVhc2VfbW9udGggJWluJSBjKDUsNiwxMSwxMikpLF1bJ2hvbGlkYXlfbW9udGgnXSA8LSAxCgojZHJvcCByZWxlYXNlIGRhdGUgYW5kIG1vbnRoCm1vdmllcyA8LSBtb3ZpZXNbICwgIShuYW1lcyhtb3ZpZXMpICVpbiUgYygncmVsZWFzZV9kYXRlJywncmVsZWFzZV9tb250aCcpKV0KYGBgCgoKIyMjIyMyLjEuMi4gTnVtYmVyIG9mIExhbmd1YWdlczsgRW5nbGlzaCAvIE5vIGVuZ2xpc2gKCmBgYHtyfQojIGNvdW50IG51bWJlciBvZiBsYW5ndWFnZXMKbW92aWVzJG51bV9sYW5nIDwtIHN0cmluZ3I6OnN0cl9jb3VudChtb3ZpZXMkc3Bva2VuX2xhbmd1YWdlcywgIlwibmFtZVwiOiIpCgptb3ZpZXMkb3JpZ2luYWxfaXNFbmdsaXNoIDwtIDAKbW92aWVzW3doaWNoKG1vdmllcyRvcmlnaW5hbF9sYW5ndWFnZSA9PSAnZW4nKSxdWydvcmlnaW5hbF9pc0VuZ2xpc2gnXSA8LSAxCgojIGRyb3Agb3JpZ2luYWwgbGFudWdhZ2U7IHNwb2tlbiBsYW5ndWFnZXMKbW92aWVzIDwtIG1vdmllc1sgLCAhKG5hbWVzKG1vdmllcykgJWluJSBjKCdvcmlnaW5hbF9sYW5ndWFnZScsICdzcG9rZW5fbGFuZ3VhZ2VzJykpXQpgYGAKCgojIyMjIzIuMS4zLiBEaXN0aW5jdCBQcm9kdWN0aW9uIENvdW50cmllcwoKYGBge3J9CiMgdHVybiBKU09OIGludG8gREYgd2l0aCBvbmUgY291bnRyeSBwZXIgcm93CmNvdW50cnlERiA9IG1vdmllcyAlPiUgZmlsdGVyKG5jaGFyKGFzLmNoYXJhY3Rlcihwcm9kdWN0aW9uX2NvdW50cmllcykpPjIpICU+JSBtdXRhdGUoanM9bGFwcGx5KGFzLmNoYXJhY3Rlcihwcm9kdWN0aW9uX2NvdW50cmllcyksZnJvbUpTT04pKSAlPiUgdW5uZXN0KGpzKSAlPiUgZHBseXI6OnNlbGVjdChpZCwgcHJvZHVjdGlvbl9jb3VudHJpZXM9bmFtZSkKCiMgU2VsZWN0IFVuaXF1ZSBDb3VudHJpZXMKIyBEaXN0aW5jdF9Db3VudHJpZXMgPSBjb3VudHJ5REYgJT4lIGRpc3RpbmN0KHByb2R1Y3Rpb25fY291bnRyaWVzKQoKZGYgPC0gYXMuZGF0YS5mcmFtZSh0YWJsZShjb3VudHJ5REYkcHJvZHVjdGlvbl9jb3VudHJpZXMpKQpoZWFkKHdpdGgoZGYsZGZbb3JkZXIoRnJlcSxkZWNyZWFzaW5nID0gVFJVRSksXSkpCmBgYAoKPiBXZSB3aWxsIG1ha2UgZHVtbXkgdmFyaWFibGVzIG9uIHRvcCA2IGZyZXF1ZW50IGNvdW50cmllcyBvbiB0aGUgbGlzdC4KCmBgYHtyfQojIFVuaXRlZCBTdGF0ZXMKbW92aWVzJGNvdW50cnlfVVNBIDwtIDAKbW92aWVzJGNvdW50cnlfVUsgPC0gMAptb3ZpZXMkY291bnRyeV9HZXJtYW55IDwtIDAKbW92aWVzJGNvdW50cnlfRnJhbmNlIDwtIDAKbW92aWVzJGNvdW50cnlfQ2FuYWRhIDwtIDAKbW92aWVzJGNvdW50cnlfQXVzdHIgPC0gMAoKZm9yIChpIGluIDE6bnJvdyhtb3ZpZXMpKSB7CiAgbm9RdW90ZXMgPC0gc3Ryc3BsaXQodG9TdHJpbmcobW92aWVzJHByb2R1Y3Rpb25fY291bnRyaWVzW2ldKSwnXCInKQogIG5vUXVvdGVzIDwtIHVubGlzdChub1F1b3RlcykKICAKICBpZiAoYW55KG5vUXVvdGVzID09ICJVbml0ZWQgU3RhdGVzIG9mIEFtZXJpY2EiKSkgewogICAgbW92aWVzJGNvdW50cnlfVVNBW2ldIDwtIDEKICB9CiAgaWYgKGFueShub1F1b3RlcyA9PSAiVW5pdGVkIEtpbmdkb20iKSkgewogICAgbW92aWVzJGNvdW50cnlfVUtbaV0gPC0gMQogIH0KICBpZiAoYW55KG5vUXVvdGVzID09ICJHZXJtYW55IikpIHsKICAgIG1vdmllcyRjb3VudHJ5X0dlcm1hbnlbaV0gPC0gMQogIH0KICBpZiAoYW55KG5vUXVvdGVzID09ICJGcmFuY2UiKSkgewogICAgbW92aWVzJGNvdW50cnlfRnJhbmNlW2ldIDwtIDEKICB9CiAgaWYgKGFueShub1F1b3RlcyA9PSAiQ2FuYWRhIikpIHsKICAgIG1vdmllcyRjb3VudHJ5X0NhbmFkYVtpXSA8LSAxCiAgfQogIGlmIChhbnkobm9RdW90ZXMgPT0gIkF1c3RyYWxpYSIpKSB7CiAgICBtb3ZpZXMkY291bnRyeV9BdXN0cltpXSA8LSAxCiAgfQp9CgojIGRyb3AgcHJvZHVjdGlvbiBjb3VudHJpZXMgY29sdW1uCm1vdmllcyA8LSBtb3ZpZXNbLCEobmFtZXMobW92aWVzKSAlaW4lIGMoJ3Byb2R1Y3Rpb25fY291bnRyaWVzJykpXQpgYGAKCmBgYHtyfQojIENoZWNrIGNvcnJlY3QgbnVtYmVycyBiZWxvdy4KIyBTaG91bGQgbWF0Y2ggd2l0aCBmcmVxdWVuY2llcyBmcm9tCiMgaGVhZCh3aXRoKGRmLGRmW29yZGVyKEZyZXEsZGVjcmVhc2luZyA9IFRSVUUpLF0pKQoKYXMuZGF0YS5mcmFtZSh0YWJsZShtb3ZpZXMkY291bnRyeV9VSykpCmBgYAoKCiMjIyMjMi4xLjQuIERpc3RpbmN0IEdlbnJlcwoKYGBge3J9CiMgdHVybiBKU09OIGludG8gREYgd2l0aCBvbmUgZ2VucmUgcGVyIHJvdwpnZW5yZURGPSBtb3ZpZXMgJT4lIGZpbHRlcihuY2hhcihhcy5jaGFyYWN0ZXIoZ2VucmVzKSk+MikgJT4lIG11dGF0ZShqcz1sYXBwbHkoYXMuY2hhcmFjdGVyKGdlbnJlcyksZnJvbUpTT04pKSAlPiUgdW5uZXN0KGpzKSAlPiUgZHBseXI6OnNlbGVjdChpZCwgZ2VucmU9bmFtZSkKIyBzbGljZShnZW5yZWRmKSAjIFZpZXcgdGhlIG5ld2x5IGNyZWF0ZWQgREYKCiMgU2VsZWN0IFVuaXF1ZSBHZW5yZXMKRGlzdGluY3RfR2VucmVzID0gdW5pcXVlKGdlbnJlREYkZ2VucmUpCiAgCiMgTWFrZSBkdW1teSB2YXJpYWJsZSBmb3IgZWFjaCBnZW5yZQpmb3IgKG5hbWUgaW4gRGlzdGluY3RfR2VucmVzKSB7CiAgCiAgIyBwcmVwZW5kICdnZW5yZScgdG8gYWxsIGdlbnJlIG5hbWVzCiAgY29sX05hbWUgPC0gcGFzdGUoJ2dlbnJlJywgbmFtZSwgc2VwPSdfJykKICBtb3ZpZXNbY29sX05hbWVdIDwtIDAKCiAgbW92aWVzW2dyZXAobmFtZSxtb3ZpZXMkZ2VucmVzKSxdW2NvbF9OYW1lXSA8LSAxCn0KCiMgRHJvcCByYXcganNvbiBnZW5yZXMKbW92aWVzIDwtIG1vdmllc1ssIShuYW1lcyhtb3ZpZXMpICVpbiUgYygnZ2VucmVzJykpXQpgYGAKCgojIyMjIzIuMS41LiBEaXN0aW5jdCBLZXl3b3JkcwoKYGBge3J9CiMgdHVybiBKU09OIGludG8gREYgd2l0aCBvbmUga2V5d29yZCBwZXIgcm93CmtleXdvcmRERj1tb3ZpZXMgJT4lIGZpbHRlcihuY2hhcihhcy5jaGFyYWN0ZXIoa2V5d29yZHMpKT4yKSAlPiUgbXV0YXRlKGpzPWxhcHBseShhcy5jaGFyYWN0ZXIoa2V5d29yZHMpLGZyb21KU09OKSkgJT4lIHVubmVzdChqcykgJT4lIGRwbHlyOjpzZWxlY3QoaWQsa2V5d29yZHM9bmFtZSkKCiMgU2VsZWN0IFVuaXF1ZSBHZW5yZXMKIyBEaXN0aW5jdF9LZXl3b3JkcyA9IHVuaXF1ZShrZXl3b3JkREYka2V5d29yZHMpCiMgbGVuZ3RoKERpc3RpbmN0X0tleXdvcmRzKQoKZGYgPC0gYXMuZGF0YS5mcmFtZSh0YWJsZShrZXl3b3JkREYka2V5d29yZHMpKQp3aXRoKGRmLGRmW29yZGVyKEZyZXEsZGVjcmVhc2luZyA9IFRSVUUpLF0pCmBgYAoKIyMjIyMyLjEuNi4gRGlzdGluY3QgUHJvZHVjdGlvbiBDb21wYW5pZXMKCmBgYHtyfQojIHR1cm4gSlNPTiBpbnRvIERGIHdpdGggb25lIGNvbXBhbnkgcGVyIHJvdwpjb21wYW55REY9bW92aWVzICU+JSBmaWx0ZXIobmNoYXIoYXMuY2hhcmFjdGVyKHByb2R1Y3Rpb25fY29tcGFuaWVzKSk+MikgJT4lIG11dGF0ZShqcz1sYXBwbHkoYXMuY2hhcmFjdGVyKHByb2R1Y3Rpb25fY29tcGFuaWVzKSxmcm9tSlNPTikpICU+JSB1bm5lc3QoanMpICU+JSBkcGx5cjo6c2VsZWN0KGlkLHByb2R1Y3Rpb25fY29tcGFuaWVzPW5hbWUpCgojIFNlbGVjdCBVbmlxdWUgR2VucmVzCiMgRGlzdGluY3RfQ29tcGFuaWVzID0gY29tcGFueURGICU+JSBkaXN0aW5jdChwcm9kdWN0aW9uX2NvbXBhbmllcykKCmRmIDwtIGFzLmRhdGEuZnJhbWUodGFibGUoY29tcGFueURGJHByb2R1Y3Rpb25fY29tcGFuaWVzKSkKd2l0aChkZixkZltvcmRlcihGcmVxLGRlY3JlYXNpbmcgPSBUUlVFKSxdKQpgYGAKCiMjIzIuMi4gTGV0cyBDbGVhbiBDcmVkaXRzIERhdGFzZXQKCmBgYHtyfQojIFNwbGl0IEpTT04gZm9yIGNhc3QKYWxsX2Nhc3QgPC0gY3JlZGl0cyAlPiUgICAgICAjIHN0YXJ0IHdpdGggdGhlIHJhdyB0aWJibGUgCiAgZmlsdGVyKG5jaGFyKGFzLmNoYXJhY3RlcihjYXN0KSkgPiAyKSAlPiUgICAgICAgICMgZmlsdGVyIG91dCBtb3ZpZXMgd2l0aCBlbXB0eSBjYXN0IAogIG11dGF0ZSggICAgICAgICAgICAgICAgICAgICAgICAgICMgICAgICAgCiAgICBqc19jYXN0ICA9ICBsYXBwbHkoYXMuY2hhcmFjdGVyKGNhc3QpLCBmcm9tSlNPTikgICMgdHVybiB0aGUgSlNPTiBpbnRvIGEgbGlzdAogICkgICU+JSAgICAgICAgICAgICAgICAgICAgICAgICAgICMKICB1bm5lc3QoanNfY2FzdCkgJT4lIGRwbHlyOjpzZWxlY3QobW92aWVfaWQsIGNhc3RfaWQsIGNoYXJhY3RlciwgY3JlZGl0X2lkLCBnZW5kZXIsIGlkLCBuYW1lID0gbmFtZSwgb3JkZXIpCgojIFNwbGl0IEpTT04gZm9yIGNyZXcKYWxsX2NyZXcgPC0gY3JlZGl0cyAlPiUgICAgICAjIHN0YXJ0IHdpdGggdGhlIHJhdyB0aWJibGUgCiAgZmlsdGVyKG5jaGFyKGFzLmNoYXJhY3RlcihjcmV3KSkgPiAyKSAlPiUgICAgICAgICMgZmlsdGVyIG91dCBtb3ZpZXMgd2l0aCBlbXB0eSBjcmV3ICAKICBtdXRhdGUoICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAgICBqc19jcmV3ICA9ICBsYXBwbHkoYXMuY2hhcmFjdGVyKGNyZXcpLCBmcm9tSlNPTikgICMgdHVybiB0aGUgSlNPTiBpbnRvIGEgbGlzdAogICkgICU+JSAgICAgICAgICAgICAgICAgICAgICAgICAgICMKICB1bm5lc3QoanNfY3JldykgJT4lIGRwbHlyOjpzZWxlY3QobW92aWVfaWQsIGNyZWRpdF9pZCwgZGVwYXJ0bWVudCwgZ2VuZGVyLCBpZCwgam9iLCBuYW1lID0gbmFtZSkKYGBgCgojIyMjIzIuMi4xLiBDaGVjayBpZiBEaXJlY3RvciBpcyBpbiB0b3AgMTAwCgpgYGB7cn0KIyBMaXN0IGFsbCBtb3ZpZSBkaXJlY3RvcnMKZGlyZWN0b3JzIDwtIGZpbHRlcihhbGxfY3Jld1ssYygnbW92aWVfaWQnLCdqb2InLCduYW1lJyldLCBhbGxfY3JldyRqb2IgPT0gJ0RpcmVjdG9yJykKZGlyZWN0b3JzCmBgYAoKYGBge3J9CiMgTGlzdCBvZiB0b3AgMTAwIGRpcmVjdG9ycwp3aXRoKFRvcC4xMDAuRGlyZWN0b3JzLldvcmtpbmcuVG9kYXksIFRvcC4xMDAuRGlyZWN0b3JzLldvcmtpbmcuVG9kYXlbb3JkZXIoTmFtZSksXSlbJ05hbWUnXQpgYGAKCj4gTWFrZSBhIGNvbHVtbiB0byByZXByZXNlbnQgaWYgdGhlIG1vdmllIGhhcyB0b3AgZGlyZWN0b3IuCgpgYGB7cn0KIyBOZXcgYmluYXJ5IGNvbHVtbiB0byBhZ2dyZWdhdGUgYnkgZGlyZWN0b3IgZmFtZQpjcmVkaXRzJHRvcERpcmVjdG9yIDwtIDAKCiMgSWYgZGlyZWN0b3IgaW4gdG9wIDEwMCBkaXJlY3RvcnMgbGlzdCwgY2hhbmdlIGZyb20gMCB0byAxCmZvciAoaSBpbiAxOm5yb3coZGlyZWN0b3JzKSkgewogIGlmIChkaXJlY3RvcnMkbmFtZVtpXSAlaW4lIHRvcDEwMGRpcmVjdG9ycykgeyAgICAgICAjIG1hdGNoIGRpcmVjdG9yIGFuZCBjcmVkaXRzIERGcyBieSBJRAogICAgY3JlZGl0c1t3aGljaChjcmVkaXRzJG1vdmllX2lkID09IGRpcmVjdG9ycyRtb3ZpZV9pZFtpXSksXVsndG9wRGlyZWN0b3InXSA8LSAxCiAgfQp9CgojIERyb3AgY3JldyBjb2x1bW4KY3JlZGl0cyA8LSBjcmVkaXRzWywhKG5hbWVzKGNyZWRpdHMpICVpbiUgYygnY3JldycpKV0KY3JlZGl0cwpgYGAKCmBgYHtyfQojIENoZWNrIHZhbHVlIGZyZXF1ZW5jaWVzCmFzLmRhdGEuZnJhbWUodGFibGUoY3JlZGl0cyR0b3BEaXJlY3RvcikpCmBgYAoKIyMjIyMyLjIuMi4gQ2hlY2sgaWYgYW55IHRvcCAxMDAgQWN0b3JzCgpgYGB7cn0KIyBMaXN0IGFsbCBtb3ZpZSBkaXJlY3RvcnMKYWN0b3JzIDwtIGZpbHRlcihhbGxfY2FzdFssYygnbW92aWVfaWQnLCduYW1lJyldKQphY3RvcnMKYGBgCgo+IE1ha2UgYSBjb2x1bW4gdG8gcmVwcmVzZW50IGlmIHRoZSBtb3ZpZSBoYXMgdG9wIGFjdG9yLgoKYGBge3J9CiMgTmV3IGJpbmFyeSBjb2x1bW4gdG8gYWdncmVnYXRlIGJ5IGRpcmVjdG9yIGZhbWUKY3JlZGl0cyR0b3BBY3RvciA8LSAwCgojIElmIEFjdG9ycyBpbiB0b3AgMTAwIGFjdG9ycyBsaXN0LCBjaGFuZ2UgZnJvbSAwIHRvIDEKZm9yIChpIGluIDE6bnJvdyhhY3RvcnMpKSB7CiAgaWYgKGFjdG9ycyRuYW1lW2ldICVpbiUgdG9wMTAwYWN0b3JzKSB7ICAgICAgICMgbWF0Y2ggYWN0b3JzIGFuZCBjcmVkaXRzIERGcyBieSBJRAogICAgY3JlZGl0c1t3aGljaChjcmVkaXRzJG1vdmllX2lkID09IGFjdG9ycyRtb3ZpZV9pZFtpXSksXVsndG9wQWN0b3InXSA8LSAxCiAgfQp9CgpoZWFkKGNyZWRpdHMkdG9wQWN0b3IpCmBgYAoKCiMjIyMjMi4yLjMuIEF2ZXJhZ2UgR2VuZGVycyBvZiBDYXN0CgpgYGB7cn0KIyAwID0gdW5rbm93bgojIDEgPSBGZW1hbGUKIyAyID0gTWFsZQoKQ2FzdEdlbmRlcnMgPC0gZmlsdGVyKGFsbF9jYXN0WyxjKCdtb3ZpZV9pZCcsJ2dlbmRlcicpXSwKICAgICAgICAgICAgICAgICAgYWxsX2Nhc3QkZ2VuZGVyICE9IDApCkNhc3RHZW5kZXJzIDwtIGFnZ3JlZ2F0ZShDYXN0R2VuZGVycyRnZW5kZXIsIGJ5PWxpc3QoQ2FzdEdlbmRlcnMkbW92aWVfaWQpLCBGVU49bWVhbikKCiMgQXBwZW5kIEF2ZyBHZW5kZXJzIHRvIENyZWRpdHMKY3JlZGl0cyRDYXN0R2VuZGVyQVZHIDwtIDEuNQoKZm9yIChJRCBpbiBDYXN0R2VuZGVycyRHcm91cC4xKXsKICBjcmVkaXRzW3doaWNoKGNyZWRpdHMkbW92aWVfaWQgPT0gSUQpLF1bJ0Nhc3RHZW5kZXJBVkcnXSA8LSAKICAgIENhc3RHZW5kZXJzW3doaWNoKENhc3RHZW5kZXJzJEdyb3VwLjEgPT0gSUQpLF1bJ3gnXQp9CgojIERyb3AgY2FzdCBjb2x1bW4KY3JlZGl0cyA8LSBjcmVkaXRzWywhKG5hbWVzKGNyZWRpdHMpICVpbiUgYygnY2FzdCcpKV0KYGBgCgojIyMjIzIuMi40LiBNZXJnZSBtb3ZpZSBhbmQgY3JlZGl0IGRhdGFzZXQKCmBgYHtyfQpUTURCX0NsZWFuZWQgPC0gbGVmdF9qb2luKGNyZWRpdHMsbW92aWVzLCBieSA9IGMoJ21vdmllX2lkJyA9ICdpZCcpKQpjb2xuYW1lcyhUTURCX0NsZWFuZWQpCmRpbShUTURCX0NsZWFuZWQpCmBgYAoKIyMjIyMyLjIuNS4gUmVtb3ZlIGtleXdvcmRzIGFuZCBwcm9kdWN0aW9uX2NvbXBhbmllcyBmcm9tIHRoZSBkYXRhc2V0IGFzIHRoZXkgYXJlIGh1Z2UgYW5kIGlzIGxlc3Mgc2lnbmlmaWNhbnQgdG8gcHJlZGljdCB0aGUgcmV2ZW51ZQoKYGBge3J9CiMgcmVtb3ZlIHRoaXMgYWZ0ZXIgY2xlYW5pbmcga2V5d29yZHMgYW5kIGNvbXBhbmllcyBwcm9wZXJseQpUTURCX0NsZWFuZWQgPC0gVE1EQl9DbGVhbmVkWywhKG5hbWVzKFRNREJfQ2xlYW5lZCkgJWluJSBjKCdrZXl3b3JkcycsJ3Byb2R1Y3Rpb25fY29tcGFuaWVzJykpXQoKVE1EQl9DbGVhbmVkIDwtIG5hLm9taXQoVE1EQl9DbGVhbmVkKQpkaW0oVE1EQl9DbGVhbmVkKQpgYGAKCmBgYHtyfQp3cml0ZS5jc3YoVE1EQl9DbGVhbmVkLCBmaWxlPSdUTURCX0NsZWFuZWQuY3N2JykKYGBgCgoKYGBge3J9CnRtZGIgPC0gcmVhZC5jc3YoIlRNREJfQ2xlYW5lZC5jc3YiLCBoZWFkZXIgPSBUUlVFLCBzdHJpbmdzQXNGYWN0b3JzID0gRkFMU0UpCnRtZGIxPC1zdWJzZXQodG1kYixzZWxlY3Q9LWMoWCxtb3ZpZV9pZCkpCmhlYWQodG1kYjEpCmBgYAoKCiMjIyMjMi4yLjYuIENvbmNpc2UgbnVtX2xhbmcgdG8gNCBzcG9rZW4gbGFuZ3VhZ2VzIGluIGEgbW92aWUgZm9yIGJldHRlciBhbmFseXNpcwoKYGBge3J9CnRtZGIxIDwtIHRtZGIxICU+JSBtdXRhdGUobnVtX2xhbmcgPSBpZmVsc2UobnVtX2xhbmcgPj0gNSwgNCwgbnVtX2xhbmcpKQoKZGYgPC0gYXMuZGF0YS5mcmFtZSh0YWJsZSh0bWRiMSRudW1fbGFuZykpCmhlYWQod2l0aChkZixkZltvcmRlcihGcmVxLGRlY3JlYXNpbmcgPSBUUlVFKSxdKSkKYGBgCgoKYGBge3J9CmhlYWQodG1kYjEpCmBgYAoKIyMjIyMyLjIuNy4gQ29udmVydGluZyBiaW5hcnkgY29sdW1ucyB0byBjYXRlZ29yaWNhbCB2YXJpYWJsZXMKCmBgYHtyfQp0bWRiMSRvcmlnaW5hbF9pc0VuZ2xpc2g8LSBhcy5mYWN0b3IodG1kYjEkb3JpZ2luYWxfaXNFbmdsaXNoKQp0bWRiMSRob2xpZGF5X21vbnRoPC0gYXMuZmFjdG9yKHRtZGIxJGhvbGlkYXlfbW9udGgpCnRtZGIxJHRvcERpcmVjdG9yPC0gYXMuZmFjdG9yKHRtZGIxJHRvcERpcmVjdG9yKQp0bWRiMSR0b3BBY3RvciA8LSBhcy5mYWN0b3IodG1kYjEkdG9wQWN0b3IpCnRtZGIxJG51bV9sYW5nIDwtIGFzLmZhY3Rvcih0bWRiMSRudW1fbGFuZykKdG1kYjEkZ2VucmVfQ3JpbWUgPC0gYXMuZmFjdG9yKHRtZGIxJGdlbnJlX0NyaW1lKQp0bWRiMSRnZW5yZV9TY2llbmNlLkZpY3Rpb24gPC0gYXMuZmFjdG9yKHRtZGIxJGdlbnJlX1NjaWVuY2UuRmljdGlvbikKdG1kYjEkZ2VucmVfRmFtaWx5PC0gYXMuZmFjdG9yKHRtZGIxJGdlbnJlX0ZhbWlseSkKdG1kYjEkZ2VucmVfQW5pbWF0aW9uIDwtIGFzLmZhY3Rvcih0bWRiMSRnZW5yZV9BbmltYXRpb24pCnRtZGIxJGdlbnJlX1dlc3Rlcm4gPC0gYXMuZmFjdG9yKHRtZGIxJGdlbnJlX1dlc3Rlcm4pCnRtZGIxJGdlbnJlX0FkdmVudHVyZSA8LSBhcy5mYWN0b3IodG1kYjEkZ2VucmVfQWR2ZW50dXJlKQp0bWRiMSRnZW5yZV9Sb21hbmNlIDwtIGFzLmZhY3Rvcih0bWRiMSRnZW5yZV9Sb21hbmNlKQp0bWRiMSRnZW5yZV9EcmFtYSA8LSBhcy5mYWN0b3IodG1kYjEkZ2VucmVfRHJhbWEpCnRtZGIxJGdlbnJlX0FjdGlvbiA8LSBhcy5mYWN0b3IodG1kYjEkZ2VucmVfQWN0aW9uKQp0bWRiMSRnZW5yZV9Eb2N1bWVudGFyeSA8LSBhcy5mYWN0b3IodG1kYjEkZ2VucmVfRG9jdW1lbnRhcnkpCnRtZGIxJGdlbnJlX0NvbWVkeSA8LSBhcy5mYWN0b3IodG1kYjEkZ2VucmVfQ29tZWR5KQp0bWRiMSRnZW5yZV9Ib3Jyb3IgPC0gYXMuZmFjdG9yKHRtZGIxJGdlbnJlX0hvcnJvcikKdG1kYjEkZ2VucmVfTXlzdGVyeSA8LSBhcy5mYWN0b3IodG1kYjEkZ2VucmVfTXlzdGVyeSkKdG1kYjEkZ2VucmVfSGlzdG9yeSA8LSBhcy5mYWN0b3IodG1kYjEkZ2VucmVfSGlzdG9yeSkKdG1kYjEkZ2VucmVfTXVzaWMgPC0gYXMuZmFjdG9yKHRtZGIxJGdlbnJlX0hpc3RvcnkpCnRtZGIxJGdlbnJlX0ZvcmVpZ24gPC0gYXMuZmFjdG9yKHRtZGIxJGdlbnJlX0ZvcmVpZ24pCnRtZGIxJGdlbnJlX1RWLk1vdmllIDwtIGFzLmZhY3Rvcih0bWRiMSRnZW5yZV9UVi5Nb3ZpZSkKdG1kYjEkZ2VucmVfRmFudGFzeSA8LSBhcy5mYWN0b3IodG1kYjEkZ2VucmVfRmFudGFzeSkKdG1kYjEkZ2VucmVfVGhyaWxsZXIgPC0gYXMuZmFjdG9yKHRtZGIxJGdlbnJlX1RocmlsbGVyKQp0bWRiMSRnZW5yZV9XYXIgPC0gYXMuZmFjdG9yKHRtZGIxJGdlbnJlX1dhcikKdG1kYjEkY291bnRyeV9VU0EgPC0gYXMuZmFjdG9yKHRtZGIxJGNvdW50cnlfVVNBKQp0bWRiMSRjb3VudHJ5X1VLIDwtIGFzLmZhY3Rvcih0bWRiMSRjb3VudHJ5X1VLKQp0bWRiMSRjb3VudHJ5X0ZyYW5jZSA8LSBhcy5mYWN0b3IodG1kYjEkY291bnRyeV9GcmFuY2UpCnRtZGIxJGNvdW50cnlfR2VybWFueSA8LSBhcy5mYWN0b3IodG1kYjEkY291bnRyeV9HZXJtYW55KQp0bWRiMSRjb3VudHJ5X0NhbmFkYSA8LSBhcy5mYWN0b3IodG1kYjEkY291bnRyeV9DYW5hZGEpCnRtZGIxJGNvdW50cnlfQXVzdHIgPC0gYXMuZmFjdG9yKHRtZGIxJGNvdW50cnlfQXVzdHIpCiN0bWRiMiRyZXZlbnVlIDwtIGxvZyh0bWRiMiRyZXZlbnVlKQojdG1kYjIkYnVkZ2V0IDwtIGxvZyh0bWRiMiRidWRnZXQpCnN0cih0bWRiMSkKYGBgCgojIyMjIzIuMi44LiBTY2FsZSB0aGUgZGF0YSBmb3IgdW5pZm9ybWl0eSBpbiBudW1lcmljYWwgdmFsdWVzIG9mIHRoZSBkYXRhc2V0CgpgYGB7cn0KbGlicmFyeShNQVNTKQppbmQgPC0gc2FwcGx5KHRtZGIxLCBpcy5udW1lcmljKQp0bWRiMVtpbmRdIDwtIGxhcHBseSh0bWRiMVtpbmRdLCBzY2FsZSkKc3RyKHRtZGIxKQpgYGAKCiMjIyMjMi4yLjkuIExldCdzIGV4cGxvcmUgZGF0YSB0byBsb29rIGF0IG91ciBudW1lcmljYWwgdmFsdWVzCgpgYGB7cn0Kc3VtbWFyeSh0bWRiMSkKYGBgCgojIyMjIzIuMi4xMC4gSW52ZXN0aWdhdGUgZm9yIG1pc3NpbmcgdmFsdWVzCgpgYGB7cn0Kc2FwcGx5KHRtZGIxLGZ1bmN0aW9uKHgpIHN1bShpcy5uYSh4KSkpICMgbnVtYmVyIG9mIG1pc3NpbmcgdmFsdWVzIGZvciBlYWNoIHZhcmlhYmxlIApgYGAKCiMjMy4gUHJlZGljdGl2ZSBtb2RlbGluZwpNb2RlbHMgdXNlZCB0byBidWlsZCB0aGUgcHJlZGljdGl2ZSBtb2RlbC0gCipMaW5lYXIgcmVncmVzc2lvbiAKKkxhc3NvIHJlZ3Jlc3Npb24gCipSaWRnZSBSZWdyZXNzaW9uIAoqUmVncmVzc2lvbiBUcmVlcyAKKlJhbmRvbSBGb3Jlc3QgCgoKIyMjIyMgUGxvdCBzY2F0dGVycGxvdCBvbiBudW1lcmljYWwgdmFyaWFibGVzCgpgYGB7cn0Kc2NhdHRlcnBsb3RNYXRyaXgofnRtZGIxJHJldmVudWUrdG1kYjEkYnVkZ2V0K3RtZGIxJHBvcHVsYXJpdHkrdG1kYjEkcnVudGltZSt0bWRiMSRDYXN0R2VuZGVyQVZHK3RtZGIxJHZvdGVfYXZlcmFnZSt0bWRiMSR2b3RlX2NvdW50KQpgYGAKCmBgYHtyfQpsaWJyYXJ5KGNvcnJwbG90KQpudW1lcmljX2NvbCA8LSBzYXBwbHkodG1kYjEsIGlzLm51bWVyaWMpCnRtZGIxX251bWVyaWM8LSB0bWRiMVssbnVtZXJpY19jb2xdCkNvcnJlbGF0aW9uPC1jb3IodG1kYjFfbnVtZXJpYykKY29ycnBsb3QoQ29ycmVsYXRpb24sIG1ldGhvZCA9ICJjb2xvciIpCmBgYAoKIyMjIzMuMS4gTGluZWFyIFJlZ3Jlc3Npb24KCiMjIyMjMy4xLjEuIFBlcmZyb20gc3RlcHdpc2UgcmVncmVzc2lvbiB0byBpZGVudGlmeSB0aGUgdG9wIHByZWRpY3RvcnMKCmBgYHtyfQpudWxsX21vZGVsPC1sbShyZXZlbnVlfjEsZGF0YT10bWRiMSkKCmZ1bGxfbW9kZWw8LWxtKHJldmVudWV+LixkYXRhPXRtZGIxKQoKc3RlcChudWxsX21vZGVsLCBzY29wZSA9IGxpc3QobG93ZXIgPSBudWxsX21vZGVsLCB1cHBlciA9IGZ1bGxfbW9kZWwpLCBkaXJlY3Rpb24gPSAiYm90aCIpCmBgYAoKYGBge3J9CnN1bW1hcnkoZnVsbF9tb2RlbCkKYGBgCgpgYGB7cn0KZGZsbTEgPC0gbG0ocmV2ZW51ZSB+IHZvdGVfY291bnQgKyBidWRnZXQgKyBnZW5yZV9GYW1pbHkgKyBob2xpZGF5X21vbnRoICsgCiAgICB0b3BEaXJlY3RvciArIGdlbnJlX1NjaWVuY2UuRmljdGlvbiArIHRvcEFjdG9yICsgZ2VucmVfQ3JpbWUgKyAKICAgIGNvdW50cnlfR2VybWFueSArIHBvcHVsYXJpdHkgKyBnZW5yZV9BbmltYXRpb24gKyBnZW5yZV9XZXN0ZXJuICsgCiAgICBnZW5yZV9BZHZlbnR1cmUgKyBnZW5yZV9Sb21hbmNlICsgZ2VucmVfRHJhbWEgKyBnZW5yZV9BY3Rpb24gKyAKICAgIHJ1bnRpbWUsIGRhdGEgPSB0bWRiMSkKc3VtbWFyeShkZmxtMSkKYGBgCgoKYGBge3J9CmRmbG0yIDwtIGxtKHJldmVudWUgfiBidWRnZXQgKyBydW50aW1lICsgdm90ZV9jb3VudCArIGdlbnJlX0NyaW1lICsgZ2VucmVfRHJhbWEgKyBnZW5yZV9BbmltYXRpb24gKyBnZW5yZV9GYW1pbHkgKyArIGhvbGlkYXlfbW9udGg6dm90ZV9jb3VudCArIHRvcEFjdG9yOnZvdGVfY291bnQgKyB0b3BEaXJlY3Rvcjp2b3RlX2NvdW50ICsgdG9wRGlyZWN0b3I6YnVkZ2V0ICsgZ2VucmVfQWN0aW9uOnZvdGVfY291bnQgKyBnZW5yZV9BZHZlbnR1cmU6dm90ZV9jb3VudCArIGdlbnJlX0NyaW1lOnZvdGVfY291bnQgKyBnZW5yZV9Sb21hbmNlOnZvdGVfY291bnQgKyBnZW5yZV9TY2llbmNlLkZpY3Rpb246dm90ZV9jb3VudCArIGdlbnJlX1dlc3Rlcm46dm90ZV9jb3VudCArIGhvbGlkYXlfbW9udGggKyB2b3RlX2F2ZXJhZ2U6dm90ZV9jb3VudCArIGJ1ZGdldDp2b3RlX2NvdW50ICsgcnVudGltZTp2b3RlX2NvdW50ICsgdm90ZV9jb3VudDpwb3B1bGFyaXR5ICwgZGF0YSA9IHRtZGIxKQpzdW1tYXJ5KGRmbG0yKQpgYGAKCgpgYGB7cn0KcGFyKG1mcm93PWMoMiwyKSkgIyBpbml0IDQgY2hhcnRzIGluIDEgcGFuZWwKcGxvdChkZmxtMikKYGBgCgpgYGB7cn0KcGFyKG1mcm93ID0gYygxLCAyKSkKaGlzdChkZmxtMSRyZXNpZHVhbHMpCnFxbm9ybShkZmxtMSRyZXNpZHVhbHMpCnFxbGluZShkZmxtMSRyZXNpZHVhbHMpCmBgYAoKYGBge3J9CmxpYnJhcnkoZ3JpZCkKbGlicmFyeShncmlkRXh0cmEpCmdfYnVkZ2V0IDwtIGdncGxvdChkYXRhPU5VTEwsIGFlcyh4PXRtZGIxJGJ1ZGdldCwgeT1kZmxtMiRyZXNpZHVhbHMpKSArCiAgZ2VvbV9wb2ludCgpCmdfdm90ZWNvdW50IDwtIGdncGxvdChkYXRhPU5VTEwsIGFlcyh4PXRtZGIxJHZvdGVfY291bnQsIHk9ZGZsbTIkcmVzaWR1YWxzKSkgKwogIGdlb21fcG9pbnQoKQpnX3BvcHVsYXJpdHkgPC0gZ2dwbG90KGRhdGE9TlVMTCwgYWVzKHg9dG1kYjEkcG9wdWxhcml0eSwgeT1kZmxtMiRyZXNpZHVhbHMpKSArCiAgZ2VvbV9wb2ludCgpCmdyaWQuYXJyYW5nZShnX2J1ZGdldCwgZ192b3RlY291bnQsZ19wb3B1bGFyaXR5KQpgYGAKCiMjIyMjMy4xLjIuIFBlcmZyb20gc3RlcHdpc2UgcmVncmVzc2lvbiB0byBpZGVudGlmeSB0aGUgdG9wIHByZWRpY3RvcnMgVG8gZXhwbG9yZSB0aGUgZGF0YSwgY3JlYXRlZCBzZXZlcmFsIGhpc3RvZ3JhbXMgb2YgcnVudGltZSwgdm90ZV9jb3VudCwgdm90ZV9hdmVyYWdlLCBwb3B1bGFyaXR5IHRvIHVuZGVyc3RhbmQgdGhlaXIgZGlzdHJpYnV0aW9uCgpgYGB7cn0Kb3B0aW9ucyhyZXByLnBsb3Qud2lkdGg9NiwgcmVwci5wbG90LmhlaWdodD00KSAKZzE8LWdncGxvdCh0bWRiMSxhZXMoeD10bWRiMSRydW50aW1lKSkrZ2VvbV9oaXN0b2dyYW0oYmlud2lkdGg9NSxhZXMoeT0uLmRlbnNpdHkuLiksZmlsbD0iZ3JlZW40IikKZzI8LWdncGxvdCh0bWRiMSxhZXMoeD10bWRiMSR2b3RlX2NvdW50KSkrZ2VvbV9oaXN0b2dyYW0oYmlud2lkdGg9NTAsYWVzKHk9Li5jb3VudC4uKSxmaWxsPSJyZWQiKQpnMzwtZ2dwbG90KHRtZGIxLGFlcyh4PXRtZGIxJHBvcHVsYXJpdHkpKStnZW9tX2hpc3RvZ3JhbShiaW53aWR0aD0xLGFlcyh5PS4uY291bnQuLiksZmlsbD0iZ3JlZW40IikKZzQ8LWdncGxvdCh0bWRiMSxhZXMoeD10bWRiMSR2b3RlX2F2ZXJhZ2UpKStnZW9tX2hpc3RvZ3JhbShiaW53aWR0aD01LGFlcyh5PS4uY291bnQuLiksZmlsbD0icmVkIikKZ3JpZC5hcnJhbmdlKGcxLGcyLGczLGc0LG5yb3c9MixuY29sPTIpCmBgYAoKIyMjIyMzLjEuMy4gU3BsaXQgZGF0YXNldCB0byBmaW5kIHRlc3QgUi1zcXVhcmVkIGZvciBsaW5lYXIgbW9kZWwKCmBgYHtyfQpzZXQuc2VlZCgxKSAgICAjIGZvciByZXByb2R1Y2libGUgZXhhbXBsZQoKIyB0cmFpbmluZyBzZXQKdHJhaW4gPC0gc2FtcGxlKDE6bnJvdyh0bWRiMSksMC43MCpucm93KHRtZGIxKSkgICAjIHJhbmRvbSBzYW1wbGUgb2YgNzUlIG9mIGRhdGEKCmZpdCA8LSBsbShyZXZlbnVlIH4gKyBidWRnZXQgKyBydW50aW1lICsgdm90ZV9jb3VudCArIGdlbnJlX0NyaW1lICsgZ2VucmVfRHJhbWEgKyBnZW5yZV9BbmltYXRpb24gKyBnZW5yZV9GYW1pbHkgKyArIGhvbGlkYXlfbW9udGg6dm90ZV9jb3VudCArIHRvcEFjdG9yOnZvdGVfY291bnQgKyB0b3BEaXJlY3Rvcjp2b3RlX2NvdW50ICsgdG9wRGlyZWN0b3I6YnVkZ2V0ICsgZ2VucmVfQWN0aW9uOnZvdGVfY291bnQgKyBnZW5yZV9BZHZlbnR1cmU6dm90ZV9jb3VudCArIGdlbnJlX0NyaW1lOnZvdGVfY291bnQgKyBnZW5yZV9Sb21hbmNlOnZvdGVfY291bnQgKyBnZW5yZV9TY2llbmNlLkZpY3Rpb246dm90ZV9jb3VudCArIGdlbnJlX1dlc3Rlcm46dm90ZV9jb3VudCArIGhvbGlkYXlfbW9udGggKyB2b3RlX2F2ZXJhZ2U6dm90ZV9jb3VudCArIGJ1ZGdldDp2b3RlX2NvdW50ICsgcnVudGltZTp2b3RlX2NvdW50ICsgdm90ZV9jb3VudDpwb3B1bGFyaXR5ICwgZGF0YSA9IHRtZGIxW3RyYWluLF0pCmBgYAoKYGBge3J9CnRlc3QgPC0gLXRyYWluCnRlc3QucHJlZCA8LSBwcmVkaWN0KGZpdCxuZXdkYXRhPXRtZGIxW3Rlc3QsXSkKdGVzdC55ICAgIDwtIHRtZGIxW3Rlc3QsXSRyZXZlbnVlCgpTUy50b3RhbCAgICAgIDwtIHN1bSgodGVzdC55IC0gbWVhbih0ZXN0LnkpKV4yKQpTUy5yZXNpZHVhbCAgIDwtIHN1bSgodGVzdC55IC0gdGVzdC5wcmVkKV4yKQpTUy5yZWdyZXNzaW9uIDwtIHN1bSgodGVzdC5wcmVkIC0gbWVhbih0ZXN0LnkpKV4yKQpTUy50b3RhbCAtIChTUy5yZWdyZXNzaW9uK1NTLnJlc2lkdWFsKQojIFsxXSA4OTU4ODkwCgojIE5PVCB0aGUgZnJhY3Rpb24gb2YgdmFyaWFiaWxpdHkgZXhwbGFpbmVkIGJ5IHRoZSBtb2RlbAp0ZXN0LnJzcSA8LSAxIC0gU1MucmVzaWR1YWwvU1MudG90YWwgIAp0ZXN0LnJzcQpgYGAKCiMjIyMzLjIuIFJpZGdlIFJlZ3Jlc3Npb24KCmBgYHtyfQp4IDwtIG1vZGVsLm1hdHJpeChyZXZlbnVlIH4gYnVkZ2V0ICsgcnVudGltZSArIHZvdGVfY291bnQgKyBnZW5yZV9DcmltZSArIGdlbnJlX0RyYW1hICsgZ2VucmVfQW5pbWF0aW9uICsgZ2VucmVfRmFtaWx5ICsgKyBob2xpZGF5X21vbnRoOnZvdGVfY291bnQgKyB0b3BBY3Rvcjp2b3RlX2NvdW50ICsgdG9wRGlyZWN0b3I6dm90ZV9jb3VudCArIHRvcERpcmVjdG9yOmJ1ZGdldCArIGdlbnJlX0FjdGlvbjp2b3RlX2NvdW50ICsgZ2VucmVfQWR2ZW50dXJlOnZvdGVfY291bnQgKyBnZW5yZV9DcmltZTp2b3RlX2NvdW50ICsgZ2VucmVfUm9tYW5jZTp2b3RlX2NvdW50ICsgZ2VucmVfU2NpZW5jZS5GaWN0aW9uOnZvdGVfY291bnQgKyBnZW5yZV9XZXN0ZXJuOnZvdGVfY291bnQgKyBob2xpZGF5X21vbnRoICsgdm90ZV9hdmVyYWdlOnZvdGVfY291bnQgKyBidWRnZXQ6dm90ZV9jb3VudCArIHJ1bnRpbWU6dm90ZV9jb3VudCArIHZvdGVfY291bnQ6cG9wdWxhcml0eSwgdG1kYjEpCnkgPSB0bWRiMSAlPiUKICBkcGx5cjo6c2VsZWN0KHJldmVudWUpICU+JQogIHVubGlzdCgpICU+JQogIGFzLm51bWVyaWMoKQpgYGAKCiMjIyMjMy4yLjEuIFNwbGl0IGRhdGFzZXQgdG8gdHJhaW4gYW5kIHRlc3QKCmBgYHtyfQpzZXQuc2VlZCgxKQoKdHJhaW4gPSB0bWRiMSAlPiUKICBzYW1wbGVfZnJhYygwLjcpCgp0ZXN0ID0gdG1kYjEgJT4lCiAgc2V0ZGlmZih0cmFpbikKCnhfdHJhaW4gPSBtb2RlbC5tYXRyaXgocmV2ZW51ZSB+IGJ1ZGdldCArIHJ1bnRpbWUgKyB2b3RlX2NvdW50ICsgZ2VucmVfQ3JpbWUgKyBnZW5yZV9EcmFtYSArIGdlbnJlX0FuaW1hdGlvbiArIGdlbnJlX0ZhbWlseSArICsgaG9saWRheV9tb250aDp2b3RlX2NvdW50ICsgdG9wQWN0b3I6dm90ZV9jb3VudCArIHRvcERpcmVjdG9yOnZvdGVfY291bnQgKyB0b3BEaXJlY3RvcjpidWRnZXQgKyBnZW5yZV9BY3Rpb246dm90ZV9jb3VudCArIGdlbnJlX0FkdmVudHVyZTp2b3RlX2NvdW50ICsgZ2VucmVfQ3JpbWU6dm90ZV9jb3VudCArIGdlbnJlX1JvbWFuY2U6dm90ZV9jb3VudCArIGdlbnJlX1NjaWVuY2UuRmljdGlvbjp2b3RlX2NvdW50ICsgZ2VucmVfV2VzdGVybjp2b3RlX2NvdW50ICsgaG9saWRheV9tb250aCArIHZvdGVfYXZlcmFnZTp2b3RlX2NvdW50ICsgYnVkZ2V0OnZvdGVfY291bnQgKyBydW50aW1lOnZvdGVfY291bnQgKyB2b3RlX2NvdW50OnBvcHVsYXJpdHksIHRyYWluKVssLTFdCgp4X3Rlc3QgPSBtb2RlbC5tYXRyaXgocmV2ZW51ZSB+IGJ1ZGdldCArIHJ1bnRpbWUgKyB2b3RlX2NvdW50ICsgZ2VucmVfQ3JpbWUgKyBnZW5yZV9EcmFtYSArIGdlbnJlX0FuaW1hdGlvbiArIGdlbnJlX0ZhbWlseSArICsgaG9saWRheV9tb250aDp2b3RlX2NvdW50ICsgdG9wQWN0b3I6dm90ZV9jb3VudCArIHRvcERpcmVjdG9yOnZvdGVfY291bnQgKyB0b3BEaXJlY3RvcjpidWRnZXQgKyBnZW5yZV9BY3Rpb246dm90ZV9jb3VudCArIGdlbnJlX0FkdmVudHVyZTp2b3RlX2NvdW50ICsgZ2VucmVfQ3JpbWU6dm90ZV9jb3VudCArIGdlbnJlX1JvbWFuY2U6dm90ZV9jb3VudCArIGdlbnJlX1NjaWVuY2UuRmljdGlvbjp2b3RlX2NvdW50ICsgZ2VucmVfV2VzdGVybjp2b3RlX2NvdW50ICsgaG9saWRheV9tb250aCArIHZvdGVfYXZlcmFnZTp2b3RlX2NvdW50ICsgYnVkZ2V0OnZvdGVfY291bnQgKyBydW50aW1lOnZvdGVfY291bnQgKyB2b3RlX2NvdW50OnBvcHVsYXJpdHksIHRlc3QpWywtMV0KCnlfdHJhaW4gPSB0cmFpbiAlPiUKICBkcGx5cjo6c2VsZWN0KHJldmVudWUpICU+JQogIHVubGlzdCgpICU+JQogIGFzLm51bWVyaWMoKQoKeV90ZXN0ID0gdGVzdCAlPiUKICBkcGx5cjo6c2VsZWN0KHJldmVudWUpICU+JQogIHVubGlzdCgpICU+JQogIGFzLm51bWVyaWMoKQpgYGAKCiMjIyMjMy4yLjIuIENyZWF0ZSBhIHNldCBvZiBsYW1iZGEgdmFsdWVzIGFuZCB0cmFpbiB0aGUgbW9kZWwKCmBgYHtyfQpsYW1iZGEgPC0gMTBec2VxKDIsIC0yLCBsZW5ndGggPSAxMDApCmBgYAoKYGBge3J9CnJpZGdlX21vZCA9IGdsbW5ldCh4X3RyYWluLCB5X3RyYWluLCBhbHBoYT0wLCBsYW1iZGEgPSBsYW1iZGEpCnBsb3QocmlkZ2VfbW9kLCB4dmFyPSdsYW1iZGEnLCBsYWJlbCA9IFRSVUUsIG1haW4gPSAiUmlkZ2UgUmVncmVzc2lvbiIpCmBgYAoKYGBge3J9CiNSdW5uaW5nIDEwLWZvbGQgY3Jvc3MgdmFsaWRhdGlvbi4Kc2V0LnNlZWQoMCkKY3YucmlkZ2Uub3V0ID0gY3YuZ2xtbmV0KHhfdHJhaW4sIHlfdHJhaW4sIGxhbWJkYSA9IGxhbWJkYSwgYWxwaGEgPSAwLCBuZm9sZHMgPSAxMCkKcGxvdChjdi5yaWRnZS5vdXQsIG1haW4gPSAiUmlkZ2UgUmVncmVzc2lvblxuIikKYmVzdGxhbWJkYS5yaWRnZSA9IGN2LnJpZGdlLm91dCRsYW1iZGEubWluCmJlc3RsYW1iZGEucmlkZ2UKbG9nKGJlc3RsYW1iZGEucmlkZ2UpCmBgYAoKYGBge3J9CiNXaGF0IGlzIHRoZSB0ZXN0IE1TRSBhc3NvY2lhdGVkIHdpdGggdGhpcyBiZXN0IHZhbHVlIG9mIGxhbWJkYT8KcmlkZ2UuYmVzdGxhbWJkYXRyYWluID0gcHJlZGljdChyaWRnZV9tb2QsIHMgPSBiZXN0bGFtYmRhLnJpZGdlLCBuZXd4ID0geF90ZXN0KQptZWFuKChyaWRnZS5iZXN0bGFtYmRhdHJhaW4gLSB5X3Rlc3QpXjIpCmBgYAoKYGBge3J9CnlfcHJlZGljdGVkIDwtIHByZWRpY3QocmlkZ2VfbW9kLCBzID0gYmVzdGxhbWJkYS5yaWRnZSwgbmV3eCA9IHhfdGVzdCkKCiMgU3VtIG9mIFNxdWFyZXMgVG90YWwgYW5kIEVycm9yCnNzdCA8LSBzdW0oKHlfdGVzdCAtIG1lYW4oeV90ZXN0KSleMikKc3NlIDwtIHN1bSgoeV9wcmVkaWN0ZWQgLSB5X3Rlc3QpXjIpCgojIFIgc3F1YXJlZApyc3FfcmlkZ2UgPC0gMSAtIHNzZSAvIHNzdApyc3FfcmlkZ2UKYGBgCgojIyMjMy4yLiBMYXNzbyBSZWdyZXNzaW9uCgpgYGB7cn0KbGFzc28ubW9kZWxzID0gZ2xtbmV0KHhfdHJhaW4sIHlfdHJhaW4sIGFscGhhID0gMSwgbGFtYmRhID0gbGFtYmRhKQpwbG90KGxhc3NvLm1vZGVscykKYGBgCgoKYGBge3J9CiNSdW5uaW5nIDEwLWZvbGQgY3Jvc3MgdmFsaWRhdGlvbi4Kc2V0LnNlZWQoMCkKY3YubGFzc28ub3V0ID0gY3YuZ2xtbmV0KHhfdHJhaW4sIHlfdHJhaW4sIGxhbWJkYSA9IGxhbWJkYSwgYWxwaGEgPSAxLCBuZm9sZHMgPSAxMCkKcGxvdChjdi5sYXNzby5vdXQsIG1haW4gPSAiTGFzc28gUmVncmVzc2lvblxuIikKYmVzdGxhbWJkYS5sYXNzbyA9IGN2Lmxhc3NvLm91dCRsYW1iZGEubWluCmJlc3RsYW1iZGEubGFzc28KYGBgCgpgYGB7cn0KI1doYXQgaXMgdGhlIHRlc3QgTVNFIGFzc29jaWF0ZWQgd2l0aCB0aGlzIGJlc3QgdmFsdWUgb2YgbGFtYmRhPwpsYXNzby5iZXN0bGFtYmRhdHJhaW4gPSBwcmVkaWN0KGxhc3NvLm1vZGVscywgcyA9IGJlc3RsYW1iZGEubGFzc28sIG5ld3ggPSB4X3Rlc3QpCm1lYW4oKGxhc3NvLmJlc3RsYW1iZGF0cmFpbiAtIHlfdGVzdCleMikKYGBgCgpgYGB7cn0KI0ZpdCB0aGUgbGFzc28gcmVncmVzc2lvbiBvbiB0aGUgdHJhaW4gZGF0YXNldCB1c2luZyB0aGUgYmVzdCBsYW1iZGEgdmFsdWUKI2Zyb20gY3Jvc3MgdmFsaWRhdGlvbjsgaW5zcGVjdCB0aGUgY29lZmZpY2llbnQgZXN0aW1hdGVzLgpsYXNzby5vdXQgPSBnbG1uZXQoeF90cmFpbiwgeV90cmFpbiwgYWxwaGEgPSAxKQpwcmVkaWN0KGxhc3NvLm91dCwgdHlwZSA9ICJjb2VmZmljaWVudHMiLCBzID0gYmVzdGxhbWJkYS5sYXNzbykKYGBgCgpgYGB7cn0KI0xldCdzIGFsc28gaW5zcGVjdCB0aGUgTVNFIG9mIG91ciBmaW5hbCBsYXNzbyBtb2RlbCBvbiBhbGwgb3VyIGRhdGEuCmxhc3NvLmJlc3RsYW1iZGEgPSBwcmVkaWN0KGxhc3NvLm91dCwgcyA9IGJlc3RsYW1iZGEubGFzc28sIG5ld3ggPSB4X3Rlc3QpCm1lYW4oKGxhc3NvLmJlc3RsYW1iZGEgLSB5X3Rlc3QpXjIpCmBgYAoKYGBge3J9CnlfcHJlZGljdGVkIDwtIHByZWRpY3QobGFzc28ub3V0LCBzID0gYmVzdGxhbWJkYS5yaWRnZSwgbmV3eCA9IHhfdGVzdCkKCiMgU3VtIG9mIFNxdWFyZXMgVG90YWwgYW5kIEVycm9yCnNzdCA8LSBzdW0oKHlfdGVzdCAtIG1lYW4oeV90ZXN0KSleMikKc3NlIDwtIHN1bSgoeV9wcmVkaWN0ZWQgLSB5X3Rlc3QpXjIpCgojIFIgc3F1YXJlZApyc3FfbGFzc28gPC0gMSAtIHNzZSAvIHNzdApyc3FfbGFzc28KYGBgCgoKCiMjIyMzLjMuIFJlZ3Jlc3Npb24gVHJlZXMKCmBgYHtyfQojQ3JlYXRlIHRyYWluaW5nIGFuZCB0ZXN0aW5nIGRhdGFzZXRzCmxpYnJhcnkocnBhcnQpCnJlZ1RyZWUgPC0gcnBhcnQocmV2ZW51ZSB+IHZvdGVfY291bnQgKyBidWRnZXQgKyAgaG9saWRheV9tb250aCArIAogICAgdG9wRGlyZWN0b3IgKyAgdG9wQWN0b3IgKyAgcG9wdWxhcml0eSArIGdlbnJlX0ZhbWlseSArIGdlbnJlX0NyaW1lICsgZ2VucmVfQW5pbWF0aW9uICsgCiAgICBnZW5yZV9BZHZlbnR1cmUgKyBnZW5yZV9Sb21hbmNlICsgZ2VucmVfRHJhbWEgKyBnZW5yZV9BY3Rpb24gKyBnZW5yZV9TY2llbmNlLkZpY3Rpb24gKwogICAgcnVudGltZSwgbWV0aG9kPSJhbm92YSIsIGRhdGE9dG1kYjEgKQpgYGAKCmBgYHtyfQpwbG90KHJlZ1RyZWUsIHVuaWZvcm09VFJVRSwgCiBtYWluPSJSZWdyZXNzaW9uIFRyZWUgZm9yIHByZWRpY3RpbmcgUmV2ZW51ZSIpCiB0ZXh0KHJlZ1RyZWUsIHVzZS5uPVRSVUUsIGNleCA9IDAuNikKYGBgCgpgYGB7cn0KbGlicmFyeShycGFydC5wbG90KQpycGFydC5wbG90KHJlZ1RyZWUsZGlnaXRzID0gNCkKYGBgCgoKYGBge3J9CnAucnBhcnQgPC0gcHJlZGljdChyZWdUcmVlLHRtZGIxKQptZWFuKChwLnJwYXJ0LXRtZGIxJHJldmVudWUpXjIpCmBgYAoKYGBge3J9CiMgU3VtIG9mIFNxdWFyZXMgVG90YWwgYW5kIEVycm9yCnNzdCA8LSBzdW0oKHRtZGIxJHJldmVudWUgLSBtZWFuKHRtZGIxJHJldmVudWUpKV4yKQpzc2UgPC0gc3VtKChwLnJwYXJ0IC0gdG1kYjEkcmV2ZW51ZSleMikKCiMgUiBzcXVhcmVkCnJzcV90cmVlIDwtIDEgLSBzc2UgLyBzc3QKcnNxX3RyZWUKYGBgCgojIyMjMy40LiBSYW5kb20gRm9yZXN0IAoKYGBge3J9CnNldC5zZWVkKDQ1KQp0cmFpbi5pbmRleCA8LSBzYW1wbGUocm93Lm5hbWVzKHRtZGIxKSwgZGltKHRtZGIxKVsxXSowLjYpCnRlc3QuaW5kZXggPC0gc2V0ZGlmZihyb3cubmFtZXModG1kYjEpLCB0cmFpbi5pbmRleCkKdHJhaW4gPC0gdG1kYjFbdHJhaW4uaW5kZXgsIF0KdGVzdCA8LSB0bWRiMVt0ZXN0LmluZGV4LCBdCmBgYAoKYGBge3J9CnNldC5zZWVkKDEwMCkKbGlicmFyeShyYW5kb21Gb3Jlc3QpCnJmIDwtIHJhbmRvbUZvcmVzdChyZXZlbnVlIH4gdm90ZV9jb3VudCArIGJ1ZGdldCArIGdlbnJlX0ZhbWlseSArIGhvbGlkYXlfbW9udGggKyAKdG9wRGlyZWN0b3IgK3RvcEFjdG9yICtwb3B1bGFyaXR5ICsgZ2VucmVfQ3JpbWUgKyBnZW5yZV9BbmltYXRpb24gKyAKZ2VucmVfQWR2ZW50dXJlICsgZ2VucmVfUm9tYW5jZSArIGdlbnJlX0RyYW1hICsgZ2VucmVfQWN0aW9uICsgZ2VucmVfU2NpZW5jZS5GaWN0aW9uICsKcnVudGltZSxkYXRhPXRyYWluLG50cmVlPTUwMCkKYGBgCgpgYGB7cn0KcHJlZF9yZiA8LSBwcmVkaWN0KHJmLHRlc3QpCm1lYW4oKHByZWRfcmYgLSB0ZXN0JHJldmVudWUpXjIpCmBgYAoKCmBgYHtyfQojIFN1bSBvZiBTcXVhcmVzIFRvdGFsIGFuZCBFcnJvcgpzc3QgPC0gc3VtKCh0ZXN0JHJldmVudWUgLSBtZWFuKHRlc3QkcmV2ZW51ZSkpXjIpCnNzZSA8LSBzdW0oKHByZWRfcmYgLSB0ZXN0JHJldmVudWUpXjIpCgojIFIgc3F1YXJlZApyc3FfcmYgPC0gMSAtIHNzZSAvIHNzdApyc3FfcmYKYGBgCgpgYGB7cn0KcHJlZF9yZjEgPC0gcHJlZGljdChyZix0bWRiMSkKbWVhbigocHJlZF9yZjEtdG1kYjEkcmV2ZW51ZSleMikKCmBgYAoKCmBgYHtyfQojaW5zdGFsbCByZXB0cmVlIHBhY2thZ2VzCmhhdmUucGFja2FnZXMgPC0gaW5zdGFsbGVkLnBhY2thZ2VzKCkKY3Jhbi5wYWNrYWdlcyA8LSBjKCdkZXZ0b29scycsJ3Bsb3RyaXgnLCdyYW5kb21Gb3Jlc3QnLCd0cmVlJykKdG8uaW5zdGFsbCA8LSBzZXRkaWZmKGNyYW4ucGFja2FnZXMsIGhhdmUucGFja2FnZXNbLDFdKQppZihsZW5ndGgodG8uaW5zdGFsbCk+MCkgaW5zdGFsbC5wYWNrYWdlcyh0by5pbnN0YWxsKQoKbGlicmFyeShkZXZ0b29scykKaWYoISgncmVwcnRyZWUnICVpbiUgaW5zdGFsbGVkLnBhY2thZ2VzKCkpKXsKICBpbnN0YWxsX2dpdGh1YignYXJhYXN0YXQvcmVwcnRyZWUnKQp9CmZvcihwIGluIGMoY3Jhbi5wYWNrYWdlcywgJ3JlcHJ0cmVlJykpIGV2YWwoc3Vic3RpdHV0ZShsaWJyYXJ5KHBrZyksIGxpc3QocGtnPXApKSkKCmBgYAoKYGBge3J9CmxpYnJhcnkocmFuZG9tRm9yZXN0KQpsaWJyYXJ5KHJlcHJ0cmVlKQoKCnJlcHJ0cmVlOjo6cGxvdC5nZXRUcmVlKHJmKQpgYGAKCmBgYHtyfQpzZXQuc2VlZCgxMDApCmxpYnJhcnkocmFuZG9tRm9yZXN0KQpyZl9ub2RlIDwtIHJhbmRvbUZvcmVzdChyZXZlbnVlIH4gdm90ZV9jb3VudCArIGJ1ZGdldCArIGdlbnJlX0ZhbWlseSArIGhvbGlkYXlfbW9udGggKyAKdG9wRGlyZWN0b3IgK3RvcEFjdG9yICtwb3B1bGFyaXR5ICsgZ2VucmVfQ3JpbWUgKyBnZW5yZV9BbmltYXRpb24gKyAKZ2VucmVfQWR2ZW50dXJlICsgZ2VucmVfUm9tYW5jZSArIGdlbnJlX0RyYW1hICsgZ2VucmVfQWN0aW9uICsgZ2VucmVfU2NpZW5jZS5GaWN0aW9uICsKcnVudGltZSxkYXRhPXRyYWluLCBtYXhub2Rlcz01MCwgbnRyZWU9NTAwKQpgYGAKCmBgYHtyfQpyZXBydHJlZTo6OnBsb3QuZ2V0VHJlZShyZl9ub2RlKQpgYGAKCgojIyBDb25jbHVzaW9uCgpQcmVkaWN0aXZlIE1vZGVsICAgIHwgUi1zcXVhcmVkICAgICAgfCBDb21tZW50Ci0tLS0tLS0tLS0tLS0tLS0tLSAgfCAtLS0tLS0tLS0tLS0tICB8IC0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLQpMaW5lYXIgUmVncmVzc2lvbiAgIHwgMC43NTcxMzA5ICAgICAgfCBMaW5lYXIgcmVncmVzc2lvbiBoYXMgdG9vIG1hbnkgbm9uLWxpbmVhcml0aWVzLiBUaGUgUi1zcXVhcmVkIG9uIGNvbXBsZXRlIGRhdGFzZXQgd2FzIDAuNzg5ClJpZGdlIFJlZ3Jlc3Npb24gICAgfCAwLjc1ODMyNzkgICAgICB8IFJpZGdlIFJlZ3Jlc3Npb24gYWRkIGEgcGVuYWx0eSBvbiBzdW0gb2Ygc3F1YXJlZCBiZXRhLiBUaGlzIGhhcyB0aGUgZWZmZWN0IG9mIOKAnHNocmlua2luZ+KAnSBsYXJnZSAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICB2YWx1ZXMgb2YgYmV0YSB0b3dhcmRzIHplcm8uIEFzIGEgcmVzdWx0IHRoZSByaWRnZSByZWdyZXNzaW9uIGVzdGltYXRlcyBhcmUgb2Z0ZW4gbW9yZSBhY2N1cmF0ZS4KTGFzc28gUmVncmVzc2lvbiAgICB8IDAuNzQ3Mjg5MSAgICAgIHwgVGhlIExBU1NPIHdvcmtzIGluIGEgc2ltaWxhciB3YXkgdG8gcmlkZ2UgcmVncmVzc2lvbiBleGNlcHQgdGhhdCBpdCB1c2VzIGFuIEwxIHBlbmFsdHkuIExBU1NPIGlzIG5vdCAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgcXVpdGUgYXMgY29tcHV0YXRpb25hbCBlZmZpY2llbnQgYXMgcmlkZ2UgcmVncmVzc2lvbi4KUmVncmVzc2lvbiBUcmVlcyAgICB8IDAuNzI2MDYyOSAgICAgIHwgQSBkZWNpc2lvbiB0cmVlIGlzIGJ1aWx0IG9uIGFuIGVudGlyZSBkYXRhc2V0LCB1c2luZyBhbGwgdGhlIGZlYXR1cmVzL3ZhcmlhYmxlcyBvZiBpbnRlcmVzdCwgdXNpbmcgYWxsICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHRoZSBmZWF0dXJlcy92YXJpYWJsZXMgb2YgaW50ZXJlc3QgClJhbmRvbSBGcm9lc3RzICAgICAgfCAwLjc4ODY0MTIgICAgICB8IEFjY3VyYWN5IGtlZXBzIGluY3JlYXNpbmcgYXMgeW91IGluY3JlYXNlIHRoZSBudW1iZXIgb2YgdHJlZXMsIGJ1dCBiZWNvbWVzIGNvbnN0YW50IGF0IGNlcnRhaW4gcG9pbnQuICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIFVubGlrZSBkZWNpc2lvbiB0cmVlLCBpdCB3b24ndCBjcmVhdGUgaGlnaGx5IGJpYXNlZCBtb2RlbCBhbmQgcmVkdWNlcyB0aGUgdmFyaWFuY2UuCgpCYXNlZCBvbiBvdXIgc2NhbGVkIGRhdGEgb3VyIHRvcCBwcmVkaWN0b3JzIGNoYW5nZWQgaW4gTGluZWFyIGFuZCBSYW5kb20gRm9yZXN0IG1vZGVscy4gSW4gUmFuZG9tIEZvcmVzdCB3ZSBoYXZlIHZvdGVfY291bnQsIHBvcHVsYXJpdHksIGdlbnJlX2FkdmVudHVyZSwgdG9wX2FjdG9yLCBob2xpZGF5X21vbnRoIHRvIGJlIHRvcDUgcHJlZGljdG9ycy4KCldlIGNvbmNsdWRlIHRoYXQgUmFuZG9tIEZvcmVzdHMgaXMgdGhlIGJlc3QgbW9kZWwgdG8gcHJlZGljdCB0aGUgcmV2ZW51ZS4gVGhlIHBvaW50IG9mIFJGIGlzIHRvIHByZXZlbnQgb3ZlcmZpdHRpbmcuIEl0IGRvZXMgdGhpcyBieSBjcmVhdGluZyByYW5kb20gc3Vic2V0cyBvZiB0aGUgZmVhdHVyZXMgYW5kIGJ1aWxkaW5nIHNtYWxsZXIgKHNoYWxsb3cpIHRyZWVzIHVzaW5nIHRoZSBzdWJzZXRzIGFuZCB0aGVuIGl0IGNvbWJpbmVzIHRoZSBzdWJ0cmVlcy4gVGhlIGRvd25zaWRlIG9mIFJGIGlzIGl0IGNhbiBiZSBzbG93IGlmIHlvdSBoYXZlIGEgc2luZ2xlIHByb2Nlc3MgYnV0IGl0IGNhbiBiZSBwYXJhbGxlbGl6ZWQu